diff --git .github/new-prs-labeler.yml .github/new-prs-labeler.yml
index 30ff1a4cf486..8789495ec2fe 100644
--- .github/new-prs-labeler.yml
+++ .github/new-prs-labeler.yml
@@ -668,7 +668,7 @@ mlgo:
   - llvm/lib/CodeGen/ML*
   - llvm/unittests/CodeGen/ML*
   - llvm/test/CodeGen/MLRegAlloc/**
-  - llvm/utils/mlgo-utils/*
+  - llvm/utils/mlgo-utils/**
 
 tools:llvm-exegesis:
   - llvm/tools/llvm-exegesis/**
diff --git .github/workflows/libcxx-build-and-test.yaml .github/workflows/libcxx-build-and-test.yaml
index 1a26a699db8e..b5e60781e000 100644
--- .github/workflows/libcxx-build-and-test.yaml
+++ .github/workflows/libcxx-build-and-test.yaml
@@ -242,6 +242,7 @@ jobs:
         - { config: mingw-dll, mingw: true }
         - { config: mingw-static, mingw: true }
         - { config: mingw-dll-i686, mingw: true }
+        - { config: mingw-incomplete-sysroot, mingw: true }
     steps:
       - uses: actions/checkout@v4
       - name: Install dependencies
@@ -260,6 +261,12 @@ jobs:
           del llvm-mingw*.zip
           mv llvm-mingw* c:\llvm-mingw
           echo "c:\llvm-mingw\bin" | Out-File -FilePath $Env:GITHUB_PATH -Encoding utf8 -Append
+      - name: Simulate a from-scratch build of llvm-mingw
+        if: ${{ matrix.config == 'mingw-incomplete-sysroot' }}
+        run: |
+          rm -r c:\llvm-mingw\include\c++
+          rm -r c:\llvm-mingw\*-w64-mingw32\lib\libc++*
+          rm -r c:\llvm-mingw\*-w64-mingw32\lib\libunwind*
       - name: Add Git Bash to the path
         run: |
           echo "c:\Program Files\Git\usr\bin" | Out-File -FilePath $Env:GITHUB_PATH -Encoding utf8 -Append
diff --git bolt/include/bolt/Profile/ProfileYAMLMapping.h bolt/include/bolt/Profile/ProfileYAMLMapping.h
index cae00e37bf27..91955afb186e 100644
--- bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -96,7 +96,7 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {
 namespace bolt {
 struct PseudoProbeInfo {
   uint32_t InlineTreeIndex = 0;
-  uint64_t BlockMask = 0; // bitset with probe indices from 1 to 64
+  uint64_t BlockMask = 0;            // bitset with probe indices from 1 to 64
   std::vector<uint64_t> BlockProbes; // block probes with indices above 64
   std::vector<uint64_t> CallProbes;
   std::vector<uint64_t> IndCallProbes;
diff --git bolt/include/bolt/Profile/YAMLProfileReader.h bolt/include/bolt/Profile/YAMLProfileReader.h
index 98ed4bc710a0..997ab9a04e29 100644
--- bolt/include/bolt/Profile/YAMLProfileReader.h
+++ bolt/include/bolt/Profile/YAMLProfileReader.h
@@ -101,6 +101,42 @@ public:
         YamlBFAdjacencyMap;
   };
 
+  // A class for matching inline tree nodes between profile and binary.
+  class InlineTreeNodeMapTy {
+    DenseMap<uint32_t, const MCDecodedPseudoProbeInlineTree *> Map;
+
+    void mapInlineTreeNode(uint32_t ProfileNode,
+                           const MCDecodedPseudoProbeInlineTree *BinaryNode) {
+      auto Res = Map.try_emplace(ProfileNode, BinaryNode);
+      assert(Res.second &&
+             "Duplicate mapping from profile node index to binary inline tree");
+      (void)Res;
+    }
+
+  public:
+    /// Returns matched InlineTree * for a given profile inline_tree_id.
+    const MCDecodedPseudoProbeInlineTree *
+    getInlineTreeNode(uint32_t ProfileInlineTreeNodeId) const {
+      auto It = Map.find(ProfileInlineTreeNodeId);
+      if (It == Map.end())
+        return nullptr;
+      return It->second;
+    }
+
+    // Match up YAML inline tree with binary inline tree.
+    // \p GetRootCallback is invoked for matching up the first YAML inline tree
+    // node and has the following signature:
+    // const MCDecodedPseudoProbeInlineTree *GetRootCallback(uint64_t RootGUID)
+    void matchInlineTrees(
+        const MCPseudoProbeDecoder &Decoder,
+        const yaml::bolt::PseudoProbeDesc &YamlPD,
+        const std::vector<yaml::bolt::InlineTreeNode> &YamlInlineTree,
+        llvm::function_ref<const MCDecodedPseudoProbeInlineTree *(uint64_t)>
+            GetRootCallback);
+
+    size_t size() const { return Map.size(); }
+  };
+
 private:
   /// Adjustments for basic samples profiles (without LBR).
   bool NormalizeByInsnCount{false};
diff --git bolt/lib/Profile/StaleProfileMatching.cpp bolt/lib/Profile/StaleProfileMatching.cpp
index 346d6183cba5..c3738e9d264a 100644
--- bolt/lib/Profile/StaleProfileMatching.cpp
+++ bolt/lib/Profile/StaleProfileMatching.cpp
@@ -256,28 +256,12 @@ public:
     return Hash1.InstrHash == Hash2.InstrHash;
   }
 
-  /// Returns matched InlineTree * for a given profile inline_tree_id.
-  const MCDecodedPseudoProbeInlineTree *
-  getInlineTreeNode(uint32_t ProfileInlineTreeNodeId) const {
-    auto It = InlineTreeNodeMap.find(ProfileInlineTreeNodeId);
-    if (It == InlineTreeNodeMap.end())
-      return nullptr;
-    return It->second;
-  }
-
-  void mapInlineTreeNode(uint32_t ProfileNode,
-                         const MCDecodedPseudoProbeInlineTree *BinaryNode) {
-    auto Res = InlineTreeNodeMap.try_emplace(ProfileNode, BinaryNode);
-    assert(Res.second &&
-           "Duplicate mapping from profile node index to binary inline tree");
-    (void)Res;
-  }
+  YAMLProfileReader::InlineTreeNodeMapTy InlineTreeNodeMap;
 
 private:
   using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
   std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
   std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
-  DenseMap<uint32_t, const MCDecodedPseudoProbeInlineTree *> InlineTreeNodeMap;
   DenseMap<const MCDecodedPseudoProbe *, FlowBlock *> BBPseudoProbeToBlock;
 
   // Uses OpcodeHash to find the most similar block for a given hash.
@@ -334,7 +318,8 @@ private:
     DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount;
 
     auto match = [&](uint32_t NodeId, uint64_t ProbeId) -> const FlowBlock * {
-      const MCDecodedPseudoProbeInlineTree *Node = getInlineTreeNode(NodeId);
+      const MCDecodedPseudoProbeInlineTree *Node =
+          InlineTreeNodeMap.getInlineTreeNode(NodeId);
       if (!Node)
         return nullptr;
       const MCDecodedPseudoProbe *BinaryProbe = nullptr;
@@ -606,6 +591,13 @@ size_t matchWeightsByHashes(
            "If pseudo probes are in use, pseudo probe decoder should exist");
     const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap();
     const uint64_t FuncAddr = BF.getAddress();
+    auto GetTopLevelNodeByGUID =
+        [&](uint64_t GUID) -> const MCDecodedPseudoProbeInlineTree * {
+      auto It = TopLevelGUIDToInlineTree.find(GUID);
+      if (It != TopLevelGUIDToInlineTree.end())
+        return It->second;
+      return nullptr;
+    };
     for (const MCDecodedPseudoProbe &Probe :
          ProbeMap.find(FuncAddr, FuncAddr + BF.getSize()))
       if (const BinaryBasicBlock *BB =
@@ -613,43 +605,8 @@ size_t matchWeightsByHashes(
         Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]);
 
     // Match inline tree nodes by GUID, checksum, parent, and call site.
-    uint32_t ParentId = 0;
-    uint32_t PrevGUIDIdx = 0;
-    uint32_t Index = 0;
-    for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlBF.InlineTree) {
-      uint64_t GUIDIdx = InlineTreeNode.GUIDIndex;
-      if (GUIDIdx != UINT32_MAX)
-        PrevGUIDIdx = GUIDIdx;
-      else
-        GUIDIdx = PrevGUIDIdx;
-      assert(GUIDIdx < YamlPD.GUID.size());
-      assert(GUIDIdx < YamlPD.GUIDHashIdx.size());
-      uint64_t GUID = YamlPD.GUID[GUIDIdx];
-      uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx];
-      assert(HashIdx < YamlPD.Hash.size());
-      uint64_t Hash = YamlPD.Hash[HashIdx];
-      uint32_t InlineTreeNodeId = Index++;
-      ParentId += InlineTreeNode.ParentIndexDelta;
-      uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
-      const MCDecodedPseudoProbeInlineTree *Cur = nullptr;
-      if (!InlineTreeNodeId) {
-        auto It = TopLevelGUIDToInlineTree.find(GUID);
-        if (It != TopLevelGUIDToInlineTree.end())
-          Cur = It->second;
-      } else if (const MCDecodedPseudoProbeInlineTree *Parent =
-                     Matcher.getInlineTreeNode(ParentId)) {
-        for (const MCDecodedPseudoProbeInlineTree &Child :
-             Parent->getChildren()) {
-          if (Child.Guid == GUID) {
-            if (std::get<1>(Child.getInlineSite()) == CallSiteProbe)
-              Cur = &Child;
-            break;
-          }
-        }
-      }
-      if (Cur && Decoder->getFuncDescForGUID(GUID)->FuncHash == Hash)
-        Matcher.mapInlineTreeNode(InlineTreeNodeId, Cur);
-    }
+    Matcher.InlineTreeNodeMap.matchInlineTrees(
+        *Decoder, YamlPD, YamlBF.InlineTree, GetTopLevelNodeByGUID);
   }
   Matcher.init(Blocks, BlendedHashes, CallHashes);
 
diff --git bolt/lib/Profile/YAMLProfileReader.cpp bolt/lib/Profile/YAMLProfileReader.cpp
index 38a80fc55fd8..b2e4c217333f 100644
--- bolt/lib/Profile/YAMLProfileReader.cpp
+++ bolt/lib/Profile/YAMLProfileReader.cpp
@@ -590,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
   return MatchedWithCallGraph;
 }
 
+void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees(
+    const MCPseudoProbeDecoder &Decoder,
+    const yaml::bolt::PseudoProbeDesc &YamlPD,
+    const std::vector<yaml::bolt::InlineTreeNode> &YamlInlineTree,
+    llvm::function_ref<const MCDecodedPseudoProbeInlineTree *(uint64_t)>
+        GetRootCallback) {
+
+  // Match inline tree nodes by GUID, checksum, parent, and call site.
+  uint32_t ParentId = 0;
+  uint32_t PrevGUIDIdx = 0;
+  uint32_t Index = 0;
+  for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) {
+    uint64_t GUIDIdx = InlineTreeNode.GUIDIndex;
+    if (GUIDIdx)
+      PrevGUIDIdx = GUIDIdx;
+    else
+      GUIDIdx = PrevGUIDIdx;
+    assert(GUIDIdx < YamlPD.GUID.size());
+    assert(GUIDIdx < YamlPD.GUIDHashIdx.size());
+    uint64_t GUID = YamlPD.GUID[GUIDIdx];
+    uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx];
+    assert(HashIdx < YamlPD.Hash.size());
+    uint64_t Hash = YamlPD.Hash[HashIdx];
+    uint32_t InlineTreeNodeId = Index++;
+    ParentId += InlineTreeNode.ParentIndexDelta;
+    uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
+    const MCDecodedPseudoProbeInlineTree *Cur = nullptr;
+    if (!InlineTreeNodeId) {
+      Cur = GetRootCallback(GUID);
+    } else if (const MCDecodedPseudoProbeInlineTree *Parent =
+                   getInlineTreeNode(ParentId)) {
+      for (const MCDecodedPseudoProbeInlineTree &Child :
+           Parent->getChildren()) {
+        if (Child.Guid == GUID) {
+          if (std::get<1>(Child.getInlineSite()) == CallSiteProbe)
+            Cur = &Child;
+          break;
+        }
+      }
+    }
+    if (Cur && Decoder.getFuncDescForGUID(GUID)->FuncHash == Hash)
+      mapInlineTreeNode(InlineTreeNodeId, Cur);
+  }
+}
+
 size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
   if (opts::NameSimilarityFunctionMatchingThreshold == 0)
     return 0;
diff --git clang-tools-extra/clang-tidy/add_new_check.py clang-tools-extra/clang-tidy/add_new_check.py
index bd69bddcc682..e366f1005353 100755
--- clang-tools-extra/clang-tidy/add_new_check.py
+++ clang-tools-extra/clang-tidy/add_new_check.py
@@ -8,9 +8,6 @@
 #
 # ===-----------------------------------------------------------------------===#
 
-from __future__ import print_function
-from __future__ import unicode_literals
-
 import argparse
 import io
 import itertools
@@ -19,10 +16,13 @@ import re
 import sys
 import textwrap
 
+# FIXME Python 3.9: Replace typing.Tuple with builtins.tuple.
+from typing import Optional, Tuple, Match
+
 
 # Adapts the module's CMakelist file. Returns 'True' if it could add a new
 # entry and 'False' if the entry already existed.
-def adapt_cmake(module_path, check_name_camel):
+def adapt_cmake(module_path: str, check_name_camel: str) -> bool:
     filename = os.path.join(module_path, "CMakeLists.txt")
 
     # The documentation files are encoded using UTF-8, however on Windows the
@@ -57,14 +57,14 @@ def adapt_cmake(module_path, check_name_camel):
 
 # Adds a header for the new check.
 def write_header(
-    module_path,
-    module,
-    namespace,
-    check_name,
-    check_name_camel,
-    description,
-    lang_restrict,
-):
+    module_path: str,
+    module: str,
+    namespace: str,
+    check_name: str,
+    check_name_camel: str,
+    description: str,
+    lang_restrict: str,
+) -> None:
     wrapped_desc = "\n".join(
         textwrap.wrap(
             description, width=80, initial_indent="/// ", subsequent_indent="/// "
@@ -139,7 +139,9 @@ public:
 
 
 # Adds the implementation of the new check.
-def write_implementation(module_path, module, namespace, check_name_camel):
+def write_implementation(
+    module_path: str, module: str, namespace: str, check_name_camel: str
+) -> None:
     filename = os.path.join(module_path, check_name_camel) + ".cpp"
     print("Creating %s..." % filename)
     with io.open(filename, "w", encoding="utf8", newline="\n") as f:
@@ -187,7 +189,7 @@ void %(check_name)s::check(const MatchFinder::MatchResult &Result) {
 
 
 # Returns the source filename that implements the module.
-def get_module_filename(module_path, module):
+def get_module_filename(module_path: str, module: str) -> str:
     modulecpp = list(
         filter(
             lambda p: p.lower() == module.lower() + "tidymodule.cpp",
@@ -198,7 +200,9 @@ def get_module_filename(module_path, module):
 
 
 # Modifies the module to include the new check.
-def adapt_module(module_path, module, check_name, check_name_camel):
+def adapt_module(
+    module_path: str, module: str, check_name: str, check_name_camel: str
+) -> None:
     filename = get_module_filename(module_path, module)
     with io.open(filename, "r", encoding="utf8") as f:
         lines = f.readlines()
@@ -217,10 +221,10 @@ def adapt_module(module_path, module, check_name, check_name_camel):
             + '");\n'
         )
 
-        lines = iter(lines)
+        lines_iter = iter(lines)
         try:
             while True:
-                line = next(lines)
+                line = next(lines_iter)
                 if not header_added:
                     match = re.search('#include "(.*)"', line)
                     if match:
@@ -247,10 +251,11 @@ def adapt_module(module_path, module, check_name, check_name_camel):
                                 # If we didn't find the check name on this line, look on the
                                 # next one.
                                 prev_line = line
-                                line = next(lines)
+                                line = next(lines_iter)
                                 match = re.search(' *"([^"]*)"', line)
                                 if match:
                                     current_check_name = match.group(1)
+                            assert current_check_name
                             if current_check_name > check_fq_name:
                                 check_added = True
                                 f.write(check_decl)
@@ -262,7 +267,9 @@ def adapt_module(module_path, module, check_name, check_name_camel):
 
 
 # Adds a release notes entry.
-def add_release_notes(module_path, module, check_name, description):
+def add_release_notes(
+    module_path: str, module: str, check_name: str, description: str
+) -> None:
     wrapped_desc = "\n".join(
         textwrap.wrap(
             description, width=80, initial_indent="  ", subsequent_indent="  "
@@ -324,9 +331,14 @@ def add_release_notes(module_path, module, check_name, description):
 
 
 # Adds a test for the check.
-def write_test(module_path, module, check_name, test_extension, test_standard):
-    if test_standard:
-        test_standard = f"-std={test_standard}-or-later "
+def write_test(
+    module_path: str,
+    module: str,
+    check_name: str,
+    test_extension: str,
+    test_standard: Optional[str],
+) -> None:
+    test_standard = f"-std={test_standard}-or-later " if test_standard else ""
     check_name_dashes = module + "-" + check_name
     filename = os.path.normpath(
         os.path.join(
@@ -362,7 +374,7 @@ void awesome_f2();
         )
 
 
-def get_actual_filename(dirname, filename):
+def get_actual_filename(dirname: str, filename: str) -> str:
     if not os.path.isdir(dirname):
         return ""
     name = os.path.join(dirname, filename)
@@ -376,7 +388,7 @@ def get_actual_filename(dirname, filename):
 
 
 # Recreates the list of checks in the docs/clang-tidy/checks directory.
-def update_checks_list(clang_tidy_path):
+def update_checks_list(clang_tidy_path: str) -> None:
     docs_dir = os.path.join(clang_tidy_path, "../docs/clang-tidy/checks")
     filename = os.path.normpath(os.path.join(docs_dir, "list.rst"))
     # Read the content of the current list.rst file
@@ -390,12 +402,12 @@ def update_checks_list(clang_tidy_path):
         for file in filter(
             lambda s: s.endswith(".rst"), os.listdir(os.path.join(docs_dir, subdir))
         ):
-            doc_files.append([subdir, file])
+            doc_files.append((subdir, file))
     doc_files.sort()
 
     # We couldn't find the source file from the check name, so try to find the
     # class name that corresponds to the check in the module file.
-    def filename_from_module(module_name, check_name):
+    def filename_from_module(module_name: str, check_name: str) -> str:
         module_path = os.path.join(clang_tidy_path, module_name)
         if not os.path.isdir(module_path):
             return ""
@@ -433,7 +445,7 @@ def update_checks_list(clang_tidy_path):
         return ""
 
     # Examine code looking for a c'tor definition to get the base class name.
-    def get_base_class(code, check_file):
+    def get_base_class(code: str, check_file: str) -> str:
         check_class_name = os.path.splitext(os.path.basename(check_file))[0]
         ctor_pattern = check_class_name + r"\([^:]*\)\s*:\s*([A-Z][A-Za-z0-9]*Check)\("
         matches = re.search(r"\s+" + check_class_name + "::" + ctor_pattern, code)
@@ -452,7 +464,7 @@ def update_checks_list(clang_tidy_path):
         return ""
 
     # Some simple heuristics to figure out if a check has an autofix or not.
-    def has_fixits(code):
+    def has_fixits(code: str) -> bool:
         for needle in [
             "FixItHint",
             "ReplacementText",
@@ -464,7 +476,7 @@ def update_checks_list(clang_tidy_path):
         return False
 
     # Try to figure out of the check supports fixits.
-    def has_auto_fix(check_name):
+    def has_auto_fix(check_name: str) -> str:
         dirname, _, check_name = check_name.partition("-")
 
         check_file = get_actual_filename(
@@ -499,7 +511,7 @@ def update_checks_list(clang_tidy_path):
 
         return ""
 
-    def process_doc(doc_file):
+    def process_doc(doc_file: Tuple[str, str]) -> Tuple[str, Optional[Match[str]]]:
         check_name = doc_file[0] + "-" + doc_file[1].replace(".rst", "")
 
         with io.open(os.path.join(docs_dir, *doc_file), "r", encoding="utf8") as doc:
@@ -508,13 +520,13 @@ def update_checks_list(clang_tidy_path):
 
             if match:
                 # Orphan page, don't list it.
-                return "", ""
+                return "", None
 
             match = re.search(r".*:http-equiv=refresh: \d+;URL=(.*).html(.*)", content)
             # Is it a redirect?
             return check_name, match
 
-    def format_link(doc_file):
+    def format_link(doc_file: Tuple[str, str]) -> str:
         check_name, match = process_doc(doc_file)
         if not match and check_name and not check_name.startswith("clang-analyzer-"):
             return "   :doc:`%(check_name)s <%(module)s/%(check)s>`,%(autofix)s\n" % {
@@ -526,7 +538,7 @@ def update_checks_list(clang_tidy_path):
         else:
             return ""
 
-    def format_link_alias(doc_file):
+    def format_link_alias(doc_file: Tuple[str, str]) -> str:
         check_name, match = process_doc(doc_file)
         if (match or (check_name.startswith("clang-analyzer-"))) and check_name:
             module = doc_file[0]
@@ -543,6 +555,7 @@ def update_checks_list(clang_tidy_path):
                 ref_end = "_"
             else:
                 redirect_parts = re.search(r"^\.\./([^/]*)/([^/]*)$", match.group(1))
+                assert redirect_parts
                 title = redirect_parts[1] + "-" + redirect_parts[2]
                 target = redirect_parts[1] + "/" + redirect_parts[2]
                 autofix = has_auto_fix(title)
@@ -599,7 +612,7 @@ def update_checks_list(clang_tidy_path):
 
 
 # Adds a documentation for the check.
-def write_docs(module_path, module, check_name):
+def write_docs(module_path: str, module: str, check_name: str) -> None:
     check_name_dashes = module + "-" + check_name
     filename = os.path.normpath(
         os.path.join(
@@ -623,15 +636,15 @@ FIXME: Describe what patterns does the check detect and why. Give examples.
         )
 
 
-def get_camel_name(check_name):
+def get_camel_name(check_name: str) -> str:
     return "".join(map(lambda elem: elem.capitalize(), check_name.split("-")))
 
 
-def get_camel_check_name(check_name):
+def get_camel_check_name(check_name: str) -> str:
     return get_camel_name(check_name) + "Check"
 
 
-def main():
+def main() -> None:
     language_to_extension = {
         "c": "c",
         "c++": "cpp",
@@ -756,6 +769,8 @@ def main():
         )
     elif language in ["objc", "objc++"]:
         language_restrict = "%(lang)s.ObjC"
+    else:
+        raise ValueError(f"Unsupported language '{language}' was specified")
 
     write_header(
         module_path,
@@ -769,7 +784,7 @@ def main():
     write_implementation(module_path, module, namespace, check_name_camel)
     adapt_module(module_path, module, check_name, check_name_camel)
     add_release_notes(module_path, module, check_name, description)
-    test_extension = language_to_extension.get(language)
+    test_extension = language_to_extension[language]
     write_test(module_path, module, check_name, test_extension, args.standard)
     write_docs(module_path, module, check_name)
     update_checks_list(clang_tidy_path)
diff --git clang-tools-extra/clang-tidy/bugprone/CastingThroughVoidCheck.cpp clang-tools-extra/clang-tidy/bugprone/CastingThroughVoidCheck.cpp
index 9e714b4be4df..f0a9ace22974 100644
--- clang-tools-extra/clang-tidy/bugprone/CastingThroughVoidCheck.cpp
+++ clang-tools-extra/clang-tidy/bugprone/CastingThroughVoidCheck.cpp
@@ -38,7 +38,9 @@ void CastingThroughVoidCheck::check(const MatchFinder::MatchResult &Result) {
   const auto ST = *Result.Nodes.getNodeAs<QualType>("source_type");
   const auto VT = *Result.Nodes.getNodeAs<QualType>("void_type");
   const auto *CE = Result.Nodes.getNodeAs<ExplicitCastExpr>("cast");
-  diag(CE->getExprLoc(), "do not cast %0 to %1 through %2") << ST << TT << VT;
+  diag(CE->getExprLoc(),
+       "do not cast %0 to %1 through %2; use reinterpret_cast instead")
+      << ST << TT << VT;
 }
 
 } // namespace clang::tidy::bugprone
diff --git clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp
index 5f046c502eb3..e516b7108842 100644
--- clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp
+++ clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp
@@ -67,9 +67,7 @@ static bool canAdvanceAssignment(AssignedLevel Level) {
 static void updateAssignmentLevel(
     const FieldDecl *Field, const Expr *Init, const CXXConstructorDecl *Ctor,
     llvm::DenseMap<const FieldDecl *, AssignedLevel> &AssignedFields) {
-  auto It = AssignedFields.find(Field);
-  if (It == AssignedFields.end())
-    It = AssignedFields.insert({Field, AssignedLevel::None}).first;
+  auto It = AssignedFields.try_emplace(Field, AssignedLevel::None).first;
 
   if (!canAdvanceAssignment(It->second))
     // fast path for already decided field.
diff --git clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.cpp clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.cpp
index 42c4b6edb6d2..afc4897eeb2a 100644
--- clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.cpp
+++ clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.cpp
@@ -14,6 +14,16 @@ using namespace clang::ast_matchers;
 
 namespace clang::tidy::misc {
 
+namespace {
+
+AST_MATCHER_P(CXXMethodDecl, firstParameter,
+              ast_matchers::internal::Matcher<ParmVarDecl>, InnerMatcher) {
+  unsigned N = Node.isExplicitObjectMemberFunction() ? 1 : 0;
+  return (N < Node.parameters().size() &&
+          InnerMatcher.matches(*Node.parameters()[N], Finder, Builder));
+}
+} // namespace
+
 void UnconventionalAssignOperatorCheck::registerMatchers(
     ast_matchers::MatchFinder *Finder) {
   const auto HasGoodReturnType =
@@ -29,7 +39,7 @@ void UnconventionalAssignOperatorCheck::registerMatchers(
                     hasName("operator="), ofClass(recordDecl().bind("class")))
           .bind("method");
   const auto IsSelfAssign =
-      cxxMethodDecl(IsAssign, hasParameter(0, parmVarDecl(hasType(IsSelf))))
+      cxxMethodDecl(IsAssign, firstParameter(parmVarDecl(hasType(IsSelf))))
           .bind("method");
 
   Finder->addMatcher(
@@ -41,8 +51,7 @@ void UnconventionalAssignOperatorCheck::registerMatchers(
             rValueReferenceType(pointee(isConstQualified()))))));
 
   Finder->addMatcher(
-      cxxMethodDecl(IsSelfAssign,
-                    hasParameter(0, parmVarDecl(hasType(BadSelf))))
+      cxxMethodDecl(IsSelfAssign, firstParameter(parmVarDecl(hasType(BadSelf))))
           .bind("ArgumentType"),
       this);
 
diff --git clang-tools-extra/clang-tidy/tool/run-clang-tidy.py clang-tools-extra/clang-tidy/tool/run-clang-tidy.py
index b702eece3700..f1b934f7139e 100755
--- clang-tools-extra/clang-tidy/tool/run-clang-tidy.py
+++ clang-tools-extra/clang-tidy/tool/run-clang-tidy.py
@@ -49,7 +49,7 @@ import tempfile
 import time
 import traceback
 from types import ModuleType
-from typing import Any, Awaitable, Callable, List, Optional, Tuple, TypeVar
+from typing import Any, Awaitable, Callable, List, Optional, TypeVar
 
 
 yaml: Optional[ModuleType] = None
@@ -621,4 +621,7 @@ async def main() -> None:
 
 
 if __name__ == "__main__":
-    asyncio.run(main())
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        pass
diff --git clang-tools-extra/clangd/DumpAST.cpp clang-tools-extra/clangd/DumpAST.cpp
index 9a525efb938e..e605f82e91fe 100644
--- clang-tools-extra/clangd/DumpAST.cpp
+++ clang-tools-extra/clangd/DumpAST.cpp
@@ -187,6 +187,7 @@ class DumpVisitor : public RecursiveASTVisitor<DumpVisitor> {
       TEMPLATE_KIND(SubstTemplateTemplateParm);
       TEMPLATE_KIND(SubstTemplateTemplateParmPack);
       TEMPLATE_KIND(UsingTemplate);
+      TEMPLATE_KIND(DeducedTemplate);
 #undef TEMPLATE_KIND
     }
     llvm_unreachable("Unhandled NameKind enum");
diff --git clang-tools-extra/clangd/Feature.cpp clang-tools-extra/clangd/Feature.cpp
index 859618a7470a..ec707a33f656 100644
--- clang-tools-extra/clangd/Feature.cpp
+++ clang-tools-extra/clangd/Feature.cpp
@@ -8,6 +8,7 @@
 
 #include "Feature.h"
 #include "clang/Basic/Version.h"
+#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 #include "llvm/Support/Compiler.h"
 #include "llvm/TargetParser/Host.h"
 
diff --git clang-tools-extra/clangd/SemanticHighlighting.cpp clang-tools-extra/clangd/SemanticHighlighting.cpp
index a366f1331c2d..e6d16af2495f 100644
--- clang-tools-extra/clangd/SemanticHighlighting.cpp
+++ clang-tools-extra/clangd/SemanticHighlighting.cpp
@@ -1120,6 +1120,7 @@ public:
     case TemplateName::SubstTemplateTemplateParm:
     case TemplateName::SubstTemplateTemplateParmPack:
     case TemplateName::UsingTemplate:
+    case TemplateName::DeducedTemplate:
       // Names that could be resolved to a TemplateDecl are handled elsewhere.
       break;
     }
diff --git clang-tools-extra/clangd/unittests/ClangdTests.cpp clang-tools-extra/clangd/unittests/ClangdTests.cpp
index c324643498d9..643b8e9f12d7 100644
--- clang-tools-extra/clangd/unittests/ClangdTests.cpp
+++ clang-tools-extra/clangd/unittests/ClangdTests.cpp
@@ -29,6 +29,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Path.h"
diff --git clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp
index b64dd4acad4c..2ce2975bd962 100644
--- clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp
+++ clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
diff --git clang-tools-extra/clangd/unittests/SerializationTests.cpp clang-tools-extra/clangd/unittests/SerializationTests.cpp
index 35a2e2ba77a6..2a7a6c36d3d1 100644
--- clang-tools-extra/clangd/unittests/SerializationTests.cpp
+++ clang-tools-extra/clangd/unittests/SerializationTests.cpp
@@ -12,6 +12,7 @@
 #include "support/Logger.h"
 #include "clang/Tooling/CompilationDatabase.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ScopedPrinter.h"
diff --git clang-tools-extra/docs/ReleaseNotes.rst clang-tools-extra/docs/ReleaseNotes.rst
index b001a6ad4466..1ad8cedc902c 100644
--- clang-tools-extra/docs/ReleaseNotes.rst
+++ clang-tools-extra/docs/ReleaseNotes.rst
@@ -95,6 +95,9 @@ The improvements are...
 Improvements to clang-tidy
 --------------------------
 
+- Improved :program:`run-clang-tidy.py` script. Fixed minor shutdown noise
+  happening on certain platforms when interrupting the script.
+
 New checks
 ^^^^^^^^^^
 
@@ -104,10 +107,18 @@ New check aliases
 Changes in existing checks
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+- Improved :doc:`bugprone-casting-through-void
+  <clang-tidy/checks/bugprone/casting-through-void>` check to suggest replacing
+  the offending code with ``reinterpret_cast``, to more clearly express intent.
+
 - Improved :doc:`modernize-use-std-format
   <clang-tidy/checks/modernize/use-std-format>` check to support replacing
   member function calls too.
 
+- Improved :doc:`misc-unconventional-assign-operator
+  <clang-tidy/checks/misc/unconventional-assign-operator>` check to avoid
+  false positive for C++23 deducing this.
+
 - Improved :doc:`modernize-use-std-print
   <clang-tidy/checks/modernize/use-std-print>` check to support replacing
   member function calls too.
diff --git clang-tools-extra/docs/clang-tidy/Contributing.rst clang-tools-extra/docs/clang-tidy/Contributing.rst
index d5303418b859..ff8b05ff263c 100644
--- clang-tools-extra/docs/clang-tidy/Contributing.rst
+++ clang-tools-extra/docs/clang-tidy/Contributing.rst
@@ -344,18 +344,20 @@ matching expressions to simplify your matcher.
   clang-query> let c1 cxxRecordDecl()
   clang-query> match c1
 
-Alternatively, pressing the tab key after a previous matcher's open parentheses would also 
-show which matchers can be chained with the previous matcher, though some matchers that work 
-may not be listed.
-
-Just like breaking up a huge function into smaller chunks with intention-revealing names 
-can help you understand a complex algorithm, breaking up a matcher into smaller matchers 
-with intention-revealing names can help you understand a complicated matcher.  
-
-Once you have a working clang-query matcher, the C++ API matchers will be the same or similar 
-to your interactively constructed matcher (there can be cases where they differ slightly). 
-You can use local variables to preserve your intention-revealing names that you applied 
-to nested matchers.
+Alternatively, pressing the tab key after a previous matcher's open parentheses 
+would also show which matchers can be chained with the previous matcher, 
+though some matchers that work may not be listed. Note that tab completion 
+does not currently work on Windows.
+
+Just like breaking up a huge function into smaller chunks with 
+intention-revealing names can help you understand a complex algorithm, breaking 
+up a matcher into smaller matchers with intention-revealing names can help 
+you understand a complicated matcher.  
+
+Once you have a working :program:`clang-query` matcher, the C++ API matchers 
+will be the same or similar to your interactively constructed matcher (there 
+can be cases where they differ slightly). You can use local variables to preserve 
+your intention-revealing names that you applied to nested matchers.
 
 Creating private matchers
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git clang-tools-extra/docs/clang-tidy/checks/bugprone/casting-through-void.rst clang-tools-extra/docs/clang-tidy/checks/bugprone/casting-through-void.rst
index a9ab478b9a82..d9f94b6a3f20 100644
--- clang-tools-extra/docs/clang-tidy/checks/bugprone/casting-through-void.rst
+++ clang-tools-extra/docs/clang-tidy/checks/bugprone/casting-through-void.rst
@@ -3,7 +3,9 @@
 bugprone-casting-through-void
 =============================
 
-Detects unsafe or redundant two-step casting operations involving ``void*``.
+Detects unsafe or redundant two-step casting operations involving ``void*``,
+which is equivalent to ``reinterpret_cast`` as per the
+`C++ Standard <https://eel.is/c++draft/expr.reinterpret.cast#7>`_.
 
 Two-step type conversions via ``void*`` are discouraged for several reasons.
 
@@ -16,7 +18,17 @@ Two-step type conversions via ``void*`` are discouraged for several reasons.
 
 In summary, avoiding two-step type conversions through ``void*`` ensures clearer code,
 maintains essential compiler warnings, and prevents ambiguity and potential runtime
-errors, particularly in complex inheritance scenarios.
+errors, particularly in complex inheritance scenarios. If such a cast is wanted,
+it shall be done via ``reinterpret_cast``, to express the intent more clearly.
+
+Note: it is expected that, after applying the suggested fix and using
+``reinterpret_cast``, the check :doc:`cppcoreguidelines-pro-type-reinterpret-cast
+<../cppcoreguidelines/pro-type-reinterpret-cast>` will emit a warning.
+This is intentional: ``reinterpret_cast`` is a dangerous operation that can
+easily break the strict aliasing rules when dereferencing the casted pointer,
+invoking Undefined Behavior. The warning is there to prompt users to carefuly
+analyze whether the usage of ``reinterpret_cast`` is safe, in which case the
+warning may be suppressed.
 
 Examples:
 
@@ -29,3 +41,8 @@ Examples:
    reinterpret_cast<IntegerPointer>(reinterpret_cast<void *>(ptr)); // WRONG
    (IntegerPointer)(void *)ptr; // WRONG
    IntegerPointer(static_cast<void *>(ptr)); // WRONG
+
+   reinterpret_cast<IntegerPointer>(ptr); // OK, clearly expresses intent.
+                                          // NOTE: dereferencing this pointer violates
+                                          // the strict aliasing rules, invoking
+                                          // Undefined Behavior.
diff --git clang-tools-extra/docs/clang-tidy/checks/misc/const-correctness.rst clang-tools-extra/docs/clang-tidy/checks/misc/const-correctness.rst
index 86fba6c7e4f7..8ac1ad56bc8c 100644
--- clang-tools-extra/docs/clang-tidy/checks/misc/const-correctness.rst
+++ clang-tools-extra/docs/clang-tidy/checks/misc/const-correctness.rst
@@ -7,8 +7,7 @@ This check implements detection of local variables which could be declared as
 ``const`` but are not. Declaring variables as ``const`` is required or recommended by many
 coding guidelines, such as:
 `ES.25 <https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#es25-declare-an-object-const-or-constexpr-unless-you-want-to-modify-its-value-later-on>`_
-from the C++ Core Guidelines and `AUTOSAR C++14 Rule A7-1-1 (6.7.1 Specifiers)
-<https://www.autosar.org/fileadmin/standards/R22-11/AP/AUTOSAR_RS_CPP14Guidelines.pdf>`_.
+from the C++ Core Guidelines.
 
 Please note that this check's analysis is type-based only. Variables that are not modified
 but used to create a non-const handle that might escape the scope are not diagnosed
diff --git clang-tools-extra/docs/clang-tidy/checks/misc/unconventional-assign-operator.rst clang-tools-extra/docs/clang-tidy/checks/misc/unconventional-assign-operator.rst
index 3b4b65a5cb68..49e3fd5b6ee4 100644
--- clang-tools-extra/docs/clang-tidy/checks/misc/unconventional-assign-operator.rst
+++ clang-tools-extra/docs/clang-tidy/checks/misc/unconventional-assign-operator.rst
@@ -13,6 +13,3 @@ types and definitions with good return type but wrong ``return`` statements.
     type (e.g. ``int``).
   * Private and deleted operators are ignored.
   * The operator must always return ``*this``.
-
-This check implements `AUTOSAR C++14 Rule A13-2-1
-<https://www.autosar.org/fileadmin/standards/R22-11/AP/AUTOSAR_RS_CPP14Guidelines.pdf>`_.
diff --git clang-tools-extra/docs/clang-tidy/checks/readability/avoid-nested-conditional-operator.rst clang-tools-extra/docs/clang-tidy/checks/readability/avoid-nested-conditional-operator.rst
index 44b74283292c..cd3906855d49 100644
--- clang-tools-extra/docs/clang-tidy/checks/readability/avoid-nested-conditional-operator.rst
+++ clang-tools-extra/docs/clang-tidy/checks/readability/avoid-nested-conditional-operator.rst
@@ -16,6 +16,3 @@ Examples:
   int NestInConditional = (condition1 ? true1 : false1) ? true2 : false2;
   int NestInTrue = condition1 ? (condition2 ? true1 : false1) : false2;
   int NestInFalse = condition1 ? true1 : condition2 ? true2 : false1;
-
-This check implements part of `AUTOSAR C++14 Rule A5-16-1
-<https://www.autosar.org/fileadmin/standards/R22-11/AP/AUTOSAR_RS_CPP14Guidelines.pdf>`_.
diff --git clang-tools-extra/test/clang-tidy/checkers/bugprone/casting-through-void.cpp clang-tools-extra/test/clang-tidy/checkers/bugprone/casting-through-void.cpp
index a784e4988587..68172212904f 100644
--- clang-tools-extra/test/clang-tidy/checkers/bugprone/casting-through-void.cpp
+++ clang-tools-extra/test/clang-tidy/checkers/bugprone/casting-through-void.cpp
@@ -10,42 +10,42 @@ const double cd = 100;
 
 void normal_test() {
   static_cast<int *>(static_cast<void *>(&d));
-  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: do not cast 'double *' to 'int *' through 'void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: do not cast 'double *' to 'int *' through 'void *'; use reinterpret_cast instead [bugprone-casting-through-void]
   static_cast<int *>(static_cast<V>(&d));
-  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: do not cast 'double *' to 'int *' through 'V' (aka 'void *') [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: do not cast 'double *' to 'int *' through 'V' (aka 'void *'); use reinterpret_cast instead [bugprone-casting-through-void]
   static_cast<int *>(static_cast<void *>(&i));
-  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: do not cast 'int *' to 'int *' through 'void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: do not cast 'int *' to 'int *' through 'void *'; use reinterpret_cast instead [bugprone-casting-through-void]
 
   static_cast<void *>(static_cast<void *>(&i));
 }
 
 void const_pointer_test() {
   static_cast<int *const>(static_cast<void *>(&d));
-  // CHECK-MESSAGES: :[[@LINE-1]]:27: warning: do not cast 'double *' to 'int *const' through 'void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:27: warning: do not cast 'double *' to 'int *const' through 'void *'; use reinterpret_cast instead [bugprone-casting-through-void]
   static_cast<int *const>(static_cast<V>(&d));
-  // CHECK-MESSAGES: :[[@LINE-1]]:27: warning: do not cast 'double *' to 'int *const' through 'V' (aka 'void *') [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:27: warning: do not cast 'double *' to 'int *const' through 'V' (aka 'void *'); use reinterpret_cast instead [bugprone-casting-through-void]
   static_cast<int *const>(static_cast<void *>(&i));
-  // CHECK-MESSAGES: :[[@LINE-1]]:27: warning: do not cast 'int *' to 'int *const' through 'void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:27: warning: do not cast 'int *' to 'int *const' through 'void *'; use reinterpret_cast instead [bugprone-casting-through-void]
 
   static_cast<void *const>(static_cast<void *>(&i));
 }
 
 void const_test() {
   static_cast<const int *>(static_cast<const void *>(&d));
-  // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not cast 'double *' to 'const int *' through 'const void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not cast 'double *' to 'const int *' through 'const void *'; use reinterpret_cast instead [bugprone-casting-through-void]
   static_cast<const int *>(static_cast<const V>(&d));
-  // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not cast 'double *' to 'const int *' through 'const V' (aka 'void *const') [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not cast 'double *' to 'const int *' through 'const V' (aka 'void *const'); use reinterpret_cast instead [bugprone-casting-through-void]
   static_cast<const int *>(static_cast<const void *>(&i));
-  // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not cast 'int *' to 'const int *' through 'const void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not cast 'int *' to 'const int *' through 'const void *'; use reinterpret_cast instead [bugprone-casting-through-void]
 
   static_cast<const void *>(static_cast<const void *>(&i));
 
   static_cast<const int *>(static_cast<const void *>(&cd));
-  // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not cast 'const double *' to 'const int *' through 'const void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not cast 'const double *' to 'const int *' through 'const void *'; use reinterpret_cast instead [bugprone-casting-through-void]
   static_cast<const int *>(static_cast<const CV>(&cd));
-  // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not cast 'const double *' to 'const int *' through 'const CV' (aka 'const void *const') [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not cast 'const double *' to 'const int *' through 'const CV' (aka 'const void *const'); use reinterpret_cast instead [bugprone-casting-through-void]
   static_cast<const int *>(static_cast<const void *>(&ci));
-  // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not cast 'const int *' to 'const int *' through 'const void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not cast 'const int *' to 'const int *' through 'const void *'; use reinterpret_cast instead [bugprone-casting-through-void]
 
   static_cast<const void *>(static_cast<const void *>(&ci));
 }
@@ -53,11 +53,11 @@ void const_test() {
 
 void reinterpret_cast_test() {
   static_cast<int *>(reinterpret_cast<void *>(&d));
-  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: do not cast 'double *' to 'int *' through 'void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: do not cast 'double *' to 'int *' through 'void *'; use reinterpret_cast instead [bugprone-casting-through-void]
   reinterpret_cast<int *>(static_cast<void *>(&d));
-  // CHECK-MESSAGES: :[[@LINE-1]]:27: warning: do not cast 'double *' to 'int *' through 'void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:27: warning: do not cast 'double *' to 'int *' through 'void *'; use reinterpret_cast instead [bugprone-casting-through-void]
   reinterpret_cast<int *>(reinterpret_cast<void *>(&d));
-  // CHECK-MESSAGES: :[[@LINE-1]]:27: warning: do not cast 'double *' to 'int *' through 'void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:27: warning: do not cast 'double *' to 'int *' through 'void *'; use reinterpret_cast instead [bugprone-casting-through-void]
 
   static_cast<void *>(reinterpret_cast<void *>(&i));
   reinterpret_cast<void *>(reinterpret_cast<void *>(&i));
@@ -66,11 +66,11 @@ void reinterpret_cast_test() {
 
 void c_style_cast_test() {
   static_cast<int *>((void *)&d);
-  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: do not cast 'double *' to 'int *' through 'void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: do not cast 'double *' to 'int *' through 'void *'; use reinterpret_cast instead [bugprone-casting-through-void]
   (int *)(void *)&d;
-  // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: do not cast 'double *' to 'int *' through 'void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: do not cast 'double *' to 'int *' through 'void *'; use reinterpret_cast instead [bugprone-casting-through-void]
   static_cast<int *>((void *)&d);
-  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: do not cast 'double *' to 'int *' through 'void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: do not cast 'double *' to 'int *' through 'void *'; use reinterpret_cast instead [bugprone-casting-through-void]
 
   static_cast<void *>((void *)&i);
 }
@@ -82,12 +82,12 @@ using I = int *;
 void cxx_functional_cast() {
   A(static_cast<void*>(&d));
   I(static_cast<void*>(&d));
-  // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not cast 'double *' to 'I' (aka 'int *') through 'void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not cast 'double *' to 'I' (aka 'int *') through 'void *'; use reinterpret_cast instead [bugprone-casting-through-void]
 }
 
 void bit_cast() {
   __builtin_bit_cast(int *, static_cast<void *>(&d));
-  // CHECK-MESSAGES: :[[@LINE-1]]:29: warning: do not cast 'double *' to 'int *' through 'void *' [bugprone-casting-through-void]
+  // CHECK-MESSAGES: :[[@LINE-1]]:29: warning: do not cast 'double *' to 'int *' through 'void *'; use reinterpret_cast instead [bugprone-casting-through-void]
 }
 
 namespace PR87069 {
diff --git clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/missing-std-forward.cpp clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/missing-std-forward.cpp
index 8116db58c937..98c592db7ce2 100644
--- clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/missing-std-forward.cpp
+++ clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/missing-std-forward.cpp
@@ -187,14 +187,14 @@ void lambda_value_reference_auxiliary_var(T&& t) {
 namespace deleted_functions {
 
 template <typename T>
-void f(T &&) = delete;
+void f(T &&t) = delete;
 
 struct S {
     template <typename T>
-    S(T &&) = delete;
+    S(T &&t) = delete;
 
     template <typename T>
-    void operator&(T &&) = delete;
+    void operator&(T &&t) = delete;
 };
 
 } // namespace deleted_functions
diff --git clang-tools-extra/test/clang-tidy/checkers/misc/unconventional-assign-operator-cxx23.cpp clang-tools-extra/test/clang-tidy/checkers/misc/unconventional-assign-operator-cxx23.cpp
new file mode 100644
index 000000000000..d947df164be8
--- /dev/null
+++ clang-tools-extra/test/clang-tidy/checkers/misc/unconventional-assign-operator-cxx23.cpp
@@ -0,0 +1,10 @@
+// RUN: %check_clang_tidy -std=c++23 %s misc-unconventional-assign-operator %t
+
+struct BadArgument {
+  BadArgument &operator=(this BadArgument& self, BadArgument &);
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: operator=() should take 'BadArgument const&', 'BadArgument&&' or 'BadArgument'
+};
+
+struct GoodArgument {
+  GoodArgument &operator=(this GoodArgument& self, GoodArgument const &);
+};
diff --git clang/cmake/caches/Release.cmake clang/cmake/caches/Release.cmake
index 6d5f75ca0074..c93ff40ff3ee 100644
--- clang/cmake/caches/Release.cmake
+++ clang/cmake/caches/Release.cmake
@@ -55,14 +55,22 @@ set(STAGE1_RUNTIMES "compiler-rt")
 
 if (LLVM_RELEASE_ENABLE_PGO)
   list(APPEND STAGE1_PROJECTS "lld")
-  set(CLANG_BOOTSTRAP_TARGETS
+  set(tmp_targets
     generate-profdata
     stage2-package
     stage2-clang
+    stage2
     stage2-install
     stage2-check-all
     stage2-check-llvm
-    stage2-check-clang CACHE STRING "")
+    stage2-check-clang)
+
+  foreach(X IN LISTS LLVM_RELEASE_FINAL_STAGE_TARGETS)
+    list(APPEND tmp_targets "stage2-${X}")
+  endforeach()
+  list(REMOVE_DUPLICATES tmp_targets)
+
+  set(CLANG_BOOTSTRAP_TARGETS "${tmp_targets}" CACHE STRING "")
 
   # Configuration for stage2-instrumented
   set(BOOTSTRAP_CLANG_ENABLE_BOOTSTRAP ON CACHE STRING "")
diff --git clang/docs/ExternalClangExamples.rst clang/docs/ExternalClangExamples.rst
index 8e986b83fd06..ec95106b4697 100644
--- clang/docs/ExternalClangExamples.rst
+++ clang/docs/ExternalClangExamples.rst
@@ -34,7 +34,7 @@ List of projects and tools
    etc."
 
 `<https://rprichard.github.io/CxxCodeBrowser/>`_
-   "A C/C++ source code indexer and navigator"
+   "A C/C++ source code indexer and navigator."
 
 `<https://github.com/etaoins/qconnectlint>`_
    "qconnectlint is a Clang tool for statically verifying the consistency
@@ -98,3 +98,6 @@ List of projects and tools
    uses of reserved identifiers to ensuring that code adheres to lifecycle
    protocols for certain LibreOffice-specific classes.  They may serve as
    examples for writing RecursiveASTVisitor-based plugins."
+
+`<https://github.com/banach-space/clang-tutor>`_
+   "A collection of out-of-tree Clang plugins for teaching and learning."
diff --git clang/docs/LanguageExtensions.rst clang/docs/LanguageExtensions.rst
index 62903fc3744c..c08697282cbf 100644
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -1483,6 +1483,7 @@ Generic lambda expressions                   __cpp_generic_lambdas            C+
 variable templates                           __cpp_variable_templates         C++14         C++03
 Binary literals                              __cpp_binary_literals            C++14         C++03
 Relaxed constexpr                            __cpp_constexpr                  C++14         C++11
+Static assert with no message                __cpp_static_assert >= 201411L   C++17         C++11
 Pack expansion in generalized lambda-capture __cpp_init_captures              C++17         C++03
 ``if constexpr``                             __cpp_if_constexpr               C++17         C++11
 fold expressions                             __cpp_fold_expressions           C++17         C++03
@@ -1503,6 +1504,7 @@ Conditional ``explicit``                     __cpp_conditional_explicit       C+
 ``static operator()``                        __cpp_static_call_operator       C++23         C++03
 Attributes on Lambda-Expressions                                              C++23         C++11
 Attributes on Structured Bindings            __cpp_structured_bindings        C++26         C++03
+Static assert with user-generated message    __cpp_static_assert >= 202306L   C++26         C++11
 Pack Indexing                                __cpp_pack_indexing              C++26         C++03
 ``= delete ("should have a reason");``       __cpp_deleted_function           C++26         C++03
 Variadic Friends                             __cpp_variadic_friend            C++26         C++03
diff --git clang/docs/OpenMPSupport.rst clang/docs/OpenMPSupport.rst
index cdbd69520e5b..72f13856a056 100644
--- clang/docs/OpenMPSupport.rst
+++ clang/docs/OpenMPSupport.rst
@@ -306,7 +306,7 @@ implementation.
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
 | misc                         | OMP_NUM_TEAMS and OMP_TEAMS_THREAD_LIMIT env vars            | :good:`done`             | D138769                                                               |
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | 'target_device' selector in context specifier                | :none:`unclaimed`        |                                                                       |
+| misc                         | 'target_device' selector in context specifier                | :none:`worked on`        |                                                                       |
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
 | misc                         | begin/end declare variant                                    | :good:`done`             | D71179                                                                |
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
diff --git clang/docs/RealtimeSanitizer.rst clang/docs/RealtimeSanitizer.rst
index 799cd43509c6..5e281a2a3579 100644
--- clang/docs/RealtimeSanitizer.rst
+++ clang/docs/RealtimeSanitizer.rst
@@ -83,3 +83,53 @@ non-zero exit code.
     #13 0x00010230dd64 in main main.cpp:9
     #14 0x0001958960dc  (<unknown module>)
     #15 0x2f557ffffffffffc  (<unknown module>)
+
+Disabling
+---------
+
+In some circumstances, you may want to suppress error reporting in a specific scope.
+
+In C++, this is achieved via  ``__rtsan::ScopedDisabler``. Within the scope where the ``ScopedDisabler`` object is instantiated, all sanitizer error reports are suppressed. This suppression applies to the current scope as well as all invoked functions, including any functions called transitively. 
+
+.. code-block:: c++
+
+    #include <sanitizer/rtsan_interface.h>
+
+    void process(const std::vector<float>& buffer) [[clang::nonblocking]] {
+        {
+            __rtsan::ScopedDisabler d;
+            ...
+        }
+    }
+
+If RealtimeSanitizer is not enabled at compile time (i.e., the code is not compiled with the ``-fsanitize=realtime`` flag), the ``ScopedDisabler`` is compiled as a no-op.
+
+In C, you can use the ``__rtsan_disable()`` and ``rtsan_enable()`` functions to manually disable and re-enable RealtimeSanitizer checks. 
+
+.. code-block:: c++
+
+    #include <sanitizer/rtsan_interface.h>
+
+    int process(const float* buffer) [[clang::nonblocking]]
+    {
+        {
+            __rtsan_disable();
+
+            ...
+
+            __rtsan_enable();
+        }
+    }
+
+Each call to ``__rtsan_disable()`` must be paired with a subsequent call to ``__rtsan_enable()`` to restore normal sanitizer functionality. If a corresponding ``rtsan_enable()`` call is not made, the behavior is undefined.
+
+Compile-time sanitizer detection
+--------------------------------
+
+Clang provides the pre-processor macro ``__has_feature`` which may be used to detect if RealtimeSanitizer is enabled at compile-time.
+
+.. code-block:: c++
+
+    #if defined(__has_feature) && __has_feature(realtime_sanitizer)
+    ...
+    #endif
diff --git clang/docs/ReleaseNotes.rst clang/docs/ReleaseNotes.rst
index 511724c73015..c4fa017b982b 100644
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -108,6 +108,12 @@ C++ Language Changes
 - Allow single element access of GCC vector/ext_vector_type object to be
   constant expression. Supports the `V.xyzw` syntax and other tidbits
   as seen in OpenCL. Selecting multiple elements is left as a future work.
+- Implement `CWG1815 <https://wg21.link/CWG1815>`_. Support lifetime extension 
+  of temporary created by aggregate initialization using a default member
+  initializer.
+
+- Accept C++26 user-defined ``static_assert`` messages in C++11 as an extension.
+
 
 C++2c Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
@@ -122,6 +128,9 @@ C++2c Feature Support
 
 - Implemented `P2747R2 constexpr placement new <https://wg21.link/P2747R2>`_.
 
+- Added the ``__builtin_is_within_lifetime`` builtin, which supports
+  `P2641R4 Checking if a union alternative is active <https://wg21.link/p2641r4>`_
+
 C++23 Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
 - Removed the restriction to literal types in constexpr functions in C++23 mode.
@@ -154,6 +163,13 @@ Resolutions to C++ Defect Reports
 - Allow ``void{}`` as a prvalue of type ``void``.
   (`CWG2351: void{} <https://cplusplus.github.io/CWG/issues/2351.html>`_).
 
+- Clang now has improved resolution to CWG2398, allowing class templates to have
+  default arguments deduced when partial ordering.
+
+- Clang now allows comparing unequal object pointers that have been cast to ``void *``
+  in constant expressions. These comparisons always worked in non-constant expressions.
+  (`CWG2749: Treatment of "pointer to void" for relational comparisons <https://cplusplus.github.io/CWG/issues/2749.html>`_).
+
 C Language Changes
 ------------------
 
@@ -225,6 +241,14 @@ Attribute Changes in Clang
   more cases where the returned reference outlives the object.
   (#GH100567)
 
+- Clang now correctly diagnoses the use of ``btf_type_tag`` in C++ and ignores
+  it; this attribute is a C-only attribute, and caused crashes with template
+  instantiation by accidentally allowing it in C++ in some circumstances.
+  (#GH106864)
+
+- Introduced a new attribute ``[[clang::coro_await_elidable]]`` on coroutine return types
+  to express elideability at call sites where the coroutine is co_awaited as a prvalue.
+
 Improvements to Clang's diagnostics
 -----------------------------------
 
@@ -270,6 +294,13 @@ Improvements to Clang's diagnostics
 
 - Clang now respects lifetimebound attribute for the assignment operator parameter. (#GH106372).
 
+- The lifetimebound and GSL analysis in clang are coherent, allowing clang to
+  detect more use-after-free bugs. (#GH100549).
+
+- Clang now diagnoses dangling cases where a gsl-pointer is constructed from a gsl-owner object inside a container (#GH100384).
+
+- Clang now warns for u8 character literals used in C23 with ``-Wpre-c23-compat`` instead of ``-Wpre-c++17-compat``.
+
 Improvements to Clang's time-trace
 ----------------------------------
 
@@ -345,6 +376,14 @@ Bug Fixes to C++ Support
 - Fixed a constraint comparison bug for friend declarations. (#GH78101)
 - Fix handling of ``_`` as the name of a lambda's init capture variable. (#GH107024)
 - Fix an issue with dependent source location expressions (#GH106428), (#GH81155), (#GH80210), (#GH85373)
+- Fixed a bug in the substitution of empty pack indexing types. (#GH105903)
+- Clang no longer tries to capture non-odr used default arguments of template parameters of generic lambdas (#GH107048)
+- Fixed a bug where defaulted comparison operators would remove ``const`` from base classes. (#GH102588)
+- Fix a crash when using ``source_location`` in the trailing return type of a lambda expression. (#GH67134)
+- A follow-up fix was added for (#GH61460), as the previous fix was not entirely correct. (#GH86361)
+- Fixed a crash in the typo correction of an invalid CTAD guide. (#GH107887)
+- Fixed a crash when clang tries to subtitute parameter pack while retaining the parameter
+  pack. #GH63819, #GH107560
 
 
 Bug Fixes to AST Handling
@@ -423,6 +462,8 @@ LoongArch Support
 RISC-V Support
 ^^^^^^^^^^^^^^
 
+- The option ``-mcmodel=large`` for the large code model is supported.
+
 CUDA/HIP Language Changes
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git clang/docs/tools/clang-formatted-files.txt clang/docs/tools/clang-formatted-files.txt
index fc07357986d9..48ded9c75455 100644
--- clang/docs/tools/clang-formatted-files.txt
+++ clang/docs/tools/clang-formatted-files.txt
@@ -5349,7 +5349,6 @@ llvm/include/llvm/IR/SSAContext.h
 llvm/include/llvm/IR/StructuralHash.h
 llvm/include/llvm/IR/TrackingMDRef.h
 llvm/include/llvm/IR/UseListOrder.h
-llvm/include/llvm/LTO/SummaryBasedOptimizations.h
 llvm/include/llvm/MC/MCAsmInfoCOFF.h
 llvm/include/llvm/MC/MCAsmInfoDarwin.h
 llvm/include/llvm/MC/MCAsmInfoELF.h
@@ -5586,7 +5585,6 @@ llvm/include/llvm/Transforms/IPO/SampleProfile.h
 llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
 llvm/include/llvm/Transforms/IPO/SCCP.h
 llvm/include/llvm/Transforms/IPO/StripSymbols.h
-llvm/include/llvm/Transforms/IPO/SyntheticCountsPropagation.h
 llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h
 llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
 llvm/include/llvm/Transforms/Scalar/ADCE.h
@@ -6070,7 +6068,6 @@ llvm/lib/IR/SSAContext.cpp
 llvm/lib/IR/Statepoint.cpp
 llvm/lib/IR/StructuralHash.cpp
 llvm/lib/IR/ValueSymbolTable.cpp
-llvm/lib/LTO/SummaryBasedOptimizations.cpp
 llvm/lib/MC/MCAsmInfoCOFF.cpp
 llvm/lib/MC/MCAsmInfoELF.cpp
 llvm/lib/MC/MCAsmInfoGOFF.cpp
@@ -6861,7 +6858,6 @@ llvm/lib/Transforms/IPO/OpenMPOpt.cpp
 llvm/lib/Transforms/IPO/SampleContextTracker.cpp
 llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
 llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
-llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
 llvm/lib/Transforms/ObjCARC/BlotMapVector.h
 llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
 llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
diff --git clang/include/clang/AST/ASTContext.h clang/include/clang/AST/ASTContext.h
index 89bb5768dbd4..168bdca3c880 100644
--- clang/include/clang/AST/ASTContext.h
+++ clang/include/clang/AST/ASTContext.h
@@ -265,6 +265,8 @@ class ASTContext : public RefCountedBase<ASTContext> {
   mutable llvm::ContextualFoldingSet<SubstTemplateTemplateParmPackStorage,
                                      ASTContext&>
     SubstTemplateTemplateParmPacks;
+  mutable llvm::ContextualFoldingSet<DeducedTemplateStorage, ASTContext &>
+      DeducedTemplates;
 
   mutable llvm::ContextualFoldingSet<ArrayParameterType, ASTContext &>
       ArrayParameterTypes;
@@ -2304,6 +2306,15 @@ public:
                                                 unsigned Index,
                                                 bool Final) const;
 
+  /// Represents a TemplateName which had some of its default arguments
+  /// deduced. This both represents this default argument deduction as sugar,
+  /// and provides the support for it's equivalences through canonicalization.
+  /// For example DeducedTemplateNames which have the same set of default
+  /// arguments are equivalent, and are also equivalent to the underlying
+  /// template when the deduced template arguments are the same.
+  TemplateName getDeducedTemplateName(TemplateName Underlying,
+                                      DefaultArguments DefaultArgs) const;
+
   enum GetBuiltinTypeError {
     /// No error
     GE_None,
@@ -2787,11 +2798,13 @@ public:
   /// template name uses the shortest form of the dependent
   /// nested-name-specifier, which itself contains all canonical
   /// types, values, and templates.
-  TemplateName getCanonicalTemplateName(const TemplateName &Name) const;
+  TemplateName getCanonicalTemplateName(TemplateName Name,
+                                        bool IgnoreDeduced = false) const;
 
   /// Determine whether the given template names refer to the same
   /// template.
-  bool hasSameTemplateName(const TemplateName &X, const TemplateName &Y) const;
+  bool hasSameTemplateName(const TemplateName &X, const TemplateName &Y,
+                           bool IgnoreDeduced = false) const;
 
   /// Determine whether the two declarations refer to the same entity.
   bool isSameEntity(const NamedDecl *X, const NamedDecl *Y) const;
diff --git clang/include/clang/AST/ASTImporter.h clang/include/clang/AST/ASTImporter.h
index f851decd0965..088a2bd0fdd4 100644
--- clang/include/clang/AST/ASTImporter.h
+++ clang/include/clang/AST/ASTImporter.h
@@ -484,6 +484,11 @@ class TypeSourceInfo;
     /// the declarations it contains.
     [[nodiscard]] llvm::Error ImportDefinition(Decl *From);
 
+    llvm::Error
+    ImportTemplateArguments(ArrayRef<TemplateArgument> FromArgs,
+                            SmallVectorImpl<TemplateArgument> &ToArgs);
+    Expected<TemplateArgument> Import(const TemplateArgument &From);
+
     /// Cope with a name conflict when importing a declaration into the
     /// given context.
     ///
diff --git clang/include/clang/AST/CXXRecordDeclDefinitionBits.def clang/include/clang/AST/CXXRecordDeclDefinitionBits.def
index cdf0804680ad..6620840df0ce 100644
--- clang/include/clang/AST/CXXRecordDeclDefinitionBits.def
+++ clang/include/clang/AST/CXXRecordDeclDefinitionBits.def
@@ -249,4 +249,8 @@ FIELD(HasDeclaredCopyAssignmentWithConstParam, 1, MERGE_OR)
 /// base classes or fields have a no-return destructor
 FIELD(IsAnyDestructorNoReturn, 1, NO_MERGE)
 
+/// Whether the record type is intangible (if any base classes or fields have
+/// type that is intangible). HLSL only.
+FIELD(IsHLSLIntangible, 1, NO_MERGE)
+
 #undef FIELD
diff --git clang/include/clang/AST/DeclCXX.h clang/include/clang/AST/DeclCXX.h
index 0d72cc6a08dc..252e6e925641 100644
--- clang/include/clang/AST/DeclCXX.h
+++ clang/include/clang/AST/DeclCXX.h
@@ -1547,6 +1547,10 @@ public:
   /// destructors are marked noreturn.
   bool isAnyDestructorNoReturn() const { return data().IsAnyDestructorNoReturn; }
 
+  /// Returns true if the class contains HLSL intangible type, either as
+  /// a field or in base class.
+  bool isHLSLIntangible() const { return data().IsHLSLIntangible; }
+
   /// If the class is a local class [class.local], returns
   /// the enclosing function declaration.
   const FunctionDecl *isLocalClass() const {
diff --git clang/include/clang/AST/DependenceFlags.h clang/include/clang/AST/DependenceFlags.h
index 3b3c1afb096a..bdcaabc143cc 100644
--- clang/include/clang/AST/DependenceFlags.h
+++ clang/include/clang/AST/DependenceFlags.h
@@ -315,6 +315,11 @@ toTemplateNameDependence(NestedNameSpecifierDependence D) {
   return Dependence(D).templateName();
 }
 
+inline TemplateNameDependence
+toTemplateNameDependence(TemplateArgumentDependence D) {
+  return Dependence(D).templateName();
+}
+
 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
 
 } // namespace clang
diff --git clang/include/clang/AST/Expr.h clang/include/clang/AST/Expr.h
index 65104acda938..66c746cc2504 100644
--- clang/include/clang/AST/Expr.h
+++ clang/include/clang/AST/Expr.h
@@ -2991,6 +2991,9 @@ public:
 
   bool hasStoredFPFeatures() const { return CallExprBits.HasFPFeatures; }
 
+  bool isCoroElideSafe() const { return CallExprBits.IsCoroElideSafe; }
+  void setCoroElideSafe(bool V = true) { CallExprBits.IsCoroElideSafe = V; }
+
   Decl *getCalleeDecl() { return getCallee()->getReferencedDeclOfCallee(); }
   const Decl *getCalleeDecl() const {
     return getCallee()->getReferencedDeclOfCallee();
diff --git clang/include/clang/AST/PropertiesBase.td clang/include/clang/AST/PropertiesBase.td
index 5f7d61951876..9b934b20cf25 100644
--- clang/include/clang/AST/PropertiesBase.td
+++ clang/include/clang/AST/PropertiesBase.td
@@ -752,6 +752,23 @@ let Class = PropertyTypeCase<TemplateName, "SubstTemplateTemplateParmPack"> in {
     return ctx.getSubstTemplateTemplateParmPack(argumentPack, associatedDecl, index, final);
   }]>;
 }
+let Class = PropertyTypeCase<TemplateName, "DeducedTemplate"> in {
+  def : ReadHelper<[{
+    auto DTS = node.getAsDeducedTemplateName();
+  }]>;
+  def : Property<"underlying", TemplateName> {
+    let Read = [{ DTS->getUnderlying() }];
+  }
+  def : Property<"startPos", UInt32> {
+    let Read = [{ DTS->getDefaultArguments().StartPos }];
+  }
+  def : Property<"defaultArgs", Array<TemplateArgument>> {
+    let Read = [{ DTS->getDefaultArguments().Args }];
+  }
+  def : Creator<[{
+    return ctx.getDeducedTemplateName(underlying, {startPos, defaultArgs});
+  }]>;
+}
 
 // Type cases for TemplateArgument.
 def : PropertyTypeKind<TemplateArgument, TemplateArgumentKind,
diff --git clang/include/clang/AST/Stmt.h clang/include/clang/AST/Stmt.h
index f1a2aac0a8b2..7aed83e9c68b 100644
--- clang/include/clang/AST/Stmt.h
+++ clang/include/clang/AST/Stmt.h
@@ -561,8 +561,11 @@ protected:
     LLVM_PREFERRED_TYPE(bool)
     unsigned HasFPFeatures : 1;
 
+    /// True if the call expression is a must-elide call to a coroutine.
+    unsigned IsCoroElideSafe : 1;
+
     /// Padding used to align OffsetToTrailingObjects to a byte multiple.
-    unsigned : 24 - 3 - NumExprBits;
+    unsigned : 24 - 4 - NumExprBits;
 
     /// The offset in bytes from the this pointer to the start of the
     /// trailing objects belonging to CallExpr. Intentionally byte sized
diff --git clang/include/clang/AST/TemplateName.h clang/include/clang/AST/TemplateName.h
index e3b7dd261535..ce97f834bfc1 100644
--- clang/include/clang/AST/TemplateName.h
+++ clang/include/clang/AST/TemplateName.h
@@ -34,6 +34,7 @@ class NestedNameSpecifier;
 enum OverloadedOperatorKind : int;
 class OverloadedTemplateStorage;
 class AssumedTemplateStorage;
+class DeducedTemplateStorage;
 struct PrintingPolicy;
 class QualifiedTemplateName;
 class SubstTemplateTemplateParmPackStorage;
@@ -50,16 +51,17 @@ protected:
   enum Kind {
     Overloaded,
     Assumed, // defined in DeclarationName.h
+    Deduced,
     SubstTemplateTemplateParm,
     SubstTemplateTemplateParmPack
   };
 
   struct BitsTag {
     LLVM_PREFERRED_TYPE(Kind)
-    unsigned Kind : 2;
+    unsigned Kind : 3;
 
     // The template parameter index.
-    unsigned Index : 15;
+    unsigned Index : 14;
 
     /// The pack index, or the number of stored templates
     /// or template arguments, depending on which subclass we have.
@@ -90,6 +92,12 @@ public:
              : nullptr;
   }
 
+  DeducedTemplateStorage *getAsDeducedTemplateName() {
+    return Bits.Kind == Deduced
+               ? reinterpret_cast<DeducedTemplateStorage *>(this)
+               : nullptr;
+  }
+
   SubstTemplateTemplateParmStorage *getAsSubstTemplateTemplateParm() {
     return Bits.Kind == SubstTemplateTemplateParm
              ? reinterpret_cast<SubstTemplateTemplateParmStorage *>(this)
@@ -172,6 +180,15 @@ public:
                       unsigned Index, bool Final);
 };
 
+struct DefaultArguments {
+  // The position in the template parameter list
+  // the first argument corresponds to.
+  unsigned StartPos;
+  ArrayRef<TemplateArgument> Args;
+
+  operator bool() const { return !Args.empty(); }
+};
+
 /// Represents a C++ template name within the type system.
 ///
 /// A C++ template name refers to a template within the C++ type
@@ -246,6 +263,10 @@ public:
     /// A template name that refers to a template declaration found through a
     /// specific using shadow declaration.
     UsingTemplate,
+
+    /// A template name that refers to another TemplateName with deduced default
+    /// arguments.
+    DeducedTemplate,
   };
 
   TemplateName() = default;
@@ -257,6 +278,7 @@ public:
   explicit TemplateName(QualifiedTemplateName *Qual);
   explicit TemplateName(DependentTemplateName *Dep);
   explicit TemplateName(UsingShadowDecl *Using);
+  explicit TemplateName(DeducedTemplateStorage *Deduced);
 
   /// Determine whether this template name is NULL.
   bool isNull() const;
@@ -271,7 +293,13 @@ public:
   /// to, if any. If the template name does not refer to a specific
   /// declaration because it is a dependent name, or if it refers to a
   /// set of function templates, returns NULL.
-  TemplateDecl *getAsTemplateDecl() const;
+  TemplateDecl *getAsTemplateDecl(bool IgnoreDeduced = false) const;
+
+  /// Retrieves the underlying template declaration that
+  /// this template name refers to, along with the
+  /// deduced default arguments, if any.
+  std::pair<TemplateDecl *, DefaultArguments>
+  getTemplateDeclAndDefaultArgs() const;
 
   /// Retrieve the underlying, overloaded function template
   /// declarations that this template name refers to, if known.
@@ -313,6 +341,11 @@ public:
   /// template declaration is introduced, if any.
   UsingShadowDecl *getAsUsingShadowDecl() const;
 
+  /// Retrieve the deduced template info, if any.
+  DeducedTemplateStorage *getAsDeducedTemplateName() const;
+
+  std::optional<TemplateName> desugar(bool IgnoreDeduced) const;
+
   TemplateName getUnderlying() const;
 
   TemplateNameDependence getDependence() const;
@@ -412,6 +445,30 @@ public:
                       std::optional<unsigned> PackIndex);
 };
 
+class DeducedTemplateStorage : public UncommonTemplateNameStorage,
+                               public llvm::FoldingSetNode {
+  friend class ASTContext;
+
+  TemplateName Underlying;
+
+  DeducedTemplateStorage(TemplateName Underlying,
+                         const DefaultArguments &DefArgs);
+
+public:
+  TemplateName getUnderlying() const { return Underlying; }
+
+  DefaultArguments getDefaultArguments() const {
+    return {/*StartPos=*/Bits.Index,
+            /*Args=*/{reinterpret_cast<const TemplateArgument *>(this + 1),
+                      Bits.Data}};
+  }
+
+  void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context) const;
+
+  static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context,
+                      TemplateName Underlying, const DefaultArguments &DefArgs);
+};
+
 inline TemplateName TemplateName::getUnderlying() const {
   if (SubstTemplateTemplateParmStorage *subst
         = getAsSubstTemplateTemplateParm())
diff --git clang/include/clang/AST/Type.h clang/include/clang/AST/Type.h
index 08f7638d7d8f..ef36a7371645 100644
--- clang/include/clang/AST/Type.h
+++ clang/include/clang/AST/Type.h
@@ -2658,6 +2658,7 @@ public:
 #define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) bool is##Id##Type() const;
 #include "clang/Basic/HLSLIntangibleTypes.def"
   bool isHLSLSpecificType() const; // Any HLSL specific type
+  bool isHLSLIntangibleType() const; // Any HLSL intangible type
 
   /// Determines if this type, which must satisfy
   /// isObjCLifetimeType(), is implicitly __unsafe_unretained rather
@@ -5828,12 +5829,15 @@ class PackIndexingType final
   QualType Pattern;
   Expr *IndexExpr;
 
-  unsigned Size;
+  unsigned Size : 31;
+
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned ExpandsToEmptyPack : 1;
 
 protected:
   friend class ASTContext; // ASTContext creates these.
   PackIndexingType(const ASTContext &Context, QualType Canonical,
-                   QualType Pattern, Expr *IndexExpr,
+                   QualType Pattern, Expr *IndexExpr, bool ExpandsToEmptyPack,
                    ArrayRef<QualType> Expansions = {});
 
 public:
@@ -5857,6 +5861,8 @@ public:
 
   bool hasSelectedType() const { return getSelectedIndex() != std::nullopt; }
 
+  bool expandsToEmptyPack() const { return ExpandsToEmptyPack; }
+
   ArrayRef<QualType> getExpansions() const {
     return {getExpansionsPtr(), Size};
   }
@@ -5869,10 +5875,10 @@ public:
     if (hasSelectedType())
       getSelectedType().Profile(ID);
     else
-      Profile(ID, Context, getPattern(), getIndexExpr());
+      Profile(ID, Context, getPattern(), getIndexExpr(), expandsToEmptyPack());
   }
   static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context,
-                      QualType Pattern, Expr *E);
+                      QualType Pattern, Expr *E, bool ExpandsToEmptyPack);
 
 private:
   const QualType *getExpansionsPtr() const {
@@ -8336,6 +8342,12 @@ inline bool Type::isHLSLSpecificType() const {
       false; // end boolean or operation
 }
 
+inline bool Type::isHLSLIntangibleType() const {
+  // All HLSL specific types are currently intangible type as well, but that
+  // might change in the future.
+  return isHLSLSpecificType();
+}
+
 inline bool Type::isTemplateTypeParmType() const {
   return isa<TemplateTypeParmType>(CanonicalType);
 }
diff --git clang/include/clang/AST/TypeLoc.h clang/include/clang/AST/TypeLoc.h
index 5db39eb3aefa..03fbdcf60140 100644
--- clang/include/clang/AST/TypeLoc.h
+++ clang/include/clang/AST/TypeLoc.h
@@ -951,12 +951,20 @@ class HLSLAttributedResourceTypeLoc
                              HLSLAttributedResourceLocInfo> {
 public:
   TypeLoc getWrappedLoc() const { return getInnerTypeLoc(); }
+
+  TypeLoc getContainedLoc() const {
+    return TypeLoc(getTypePtr()->getContainedType(), getNonLocalData());
+  }
+
   void setSourceRange(const SourceRange &R) { getLocalData()->Range = R; }
   SourceRange getLocalSourceRange() const { return getLocalData()->Range; }
   void initializeLocal(ASTContext &Context, SourceLocation loc) {
     setSourceRange(SourceRange());
   }
   QualType getInnerType() const { return getTypePtr()->getWrappedType(); }
+  unsigned getLocalDataSize() const {
+    return sizeof(HLSLAttributedResourceLocInfo);
+  }
 };
 
 struct ObjCObjectTypeLocInfo {
diff --git clang/include/clang/AST/TypeProperties.td clang/include/clang/AST/TypeProperties.td
index 3df19315fd57..539a344cb0b6 100644
--- clang/include/clang/AST/TypeProperties.td
+++ clang/include/clang/AST/TypeProperties.td
@@ -473,9 +473,12 @@ let Class = PackIndexingType in {
   def : Property<"indexExpression", ExprRef> {
     let Read = [{ node->getIndexExpr() }];
   }
+  def : Property<"expandsToEmptyPack", Bool> {
+    let Read = [{ node->expandsToEmptyPack() }];
+  }
 
   def : Creator<[{
-    return ctx.getPackIndexingType(pattern, indexExpression);
+    return ctx.getPackIndexingType(pattern, indexExpression, expandsToEmptyPack);
   }]>;
 }
 
diff --git clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e1..267cde64f8f2 100644
--- clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/Support/Debug.h"
@@ -106,6 +107,20 @@ public:
   virtual void handleUnsafeOperation(const Stmt *Operation,
                                      bool IsRelatedToDecl, ASTContext &Ctx) = 0;
 
+  /// Invoked when a call to an unsafe libc function is found.
+  /// \param PrintfInfo
+  ///  is 0 if the callee function is not a member of the printf family;
+  ///  is 1 if the callee is `sprintf`;
+  ///  is 2 if arguments of the call have `__size_by` relation but are not in a
+  ///  safe pattern;
+  ///  is 3 if string arguments do not guarantee null-termination
+  ///  is 4 if the callee takes va_list
+  /// \param UnsafeArg one of the actual arguments that is unsafe, non-null
+  /// only when `2 <= PrintfInfo <= 3`
+  virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+                                    ASTContext &Ctx,
+                                    const Expr *UnsafeArg = nullptr) = 0;
+
   /// Invoked when an unsafe operation with a std container is found.
   virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
                                                 bool IsRelatedToDecl,
@@ -151,6 +166,10 @@ public:
   virtual bool
   ignoreUnsafeBufferInContainer(const SourceLocation &Loc) const = 0;
 
+  /// \return true iff unsafe libc call should NOT be reported at `Loc`
+  virtual bool
+  ignoreUnsafeBufferInLibcCall(const SourceLocation &Loc) const = 0;
+
   virtual std::string
   getUnsafeBufferUsageAttributeTextAt(SourceLocation Loc,
                                       StringRef WSSuffix = "") const = 0;
diff --git clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62..09fa510bc047 100644
--- clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -18,10 +18,10 @@
 #define WARNING_GADGET(name) GADGET(name)
 #endif
 
-/// A `WARNING_GADGET` subset, where the code pattern of each gadget
-/// corresponds uses of a (possibly hardened) contatiner (e.g., `std::span`).
-#ifndef WARNING_CONTAINER_GADGET
-#define WARNING_CONTAINER_GADGET(name) WARNING_GADGET(name)
+/// A `WARNING_GADGET` subset, each of which may be enable/disable separately
+/// with different flags
+#ifndef WARNING_OPTIONAL_GADGET
+#define WARNING_OPTIONAL_GADGET(name) WARNING_GADGET(name)
 #endif
 
 /// Safe gadgets correspond to code patterns that aren't unsafe but need to be
@@ -38,7 +38,8 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
-WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)`
+WARNING_OPTIONAL_GADGET(UnsafeLibcFunctionCall)
+WARNING_OPTIONAL_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)`
 FIXABLE_GADGET(ULCArraySubscript)          // `DRE[any]` in an Unspecified Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
 FIXABLE_GADGET(PointerDereference)
@@ -52,5 +53,5 @@ FIXABLE_GADGET(PointerInit)
 
 #undef FIXABLE_GADGET
 #undef WARNING_GADGET
-#undef WARNING_CONTAINER_GADGET
+#undef WARNING_OPTIONAL_GADGET
 #undef GADGET
diff --git clang/include/clang/Basic/AArch64SVEACLETypes.def clang/include/clang/Basic/AArch64SVEACLETypes.def
index fa9c1ac0491c..56e6179a664e 100644
--- clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -8,28 +8,48 @@
 //
 //  This file defines various SVE builtin types.  The macros are:
 //
-//    SVE_TYPE(Name, Id, SingletonId) - A builtin type that has not been
-//    covered by any other #define.  Defining this macro covers all
-//    the builtins.
+//    SVE_TYPE:
+//    - (Name, MangledName, Id, SingletonId)
+//    A builtin type that has not been covered by any other #define. Defining
+//    this macro covers all the builtin types.
 //
-//    SVE_VECTOR_TYPE(Name, Id, SingletonId, ElKind, ElBits, IsSigned, IsFP) -
-//    An SVE scalable vector.
+//    SVE_VECTOR_TYPE, SVE_PREDICATE_TYPE, SVE_OPAQUE_TYPE:
+//    - (Name, MangledName, Id, SingletonId)
+//    A builtin type that has not been covered by any other #define. Defining
+//    this macro covers the named subset of builtin types.
 //
-//    SVE_PREDICATE_TYPE(Name, Id, SingletonId, ElKind) - An SVE scalable
-//    predicate.
+//    SVE_VECTOR_TYPE_INT
+//    - (Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, IsSigned)
+//    Defining the macro covers the integer vector types.
+//
+//    SVE_VECTOR_TYPE_FLOAT, SVE_VECTOR_TYPE_BFLOAT:
+//    - (Name, MangledName, Id, SingletonId, NumEls, ElBits, NF)
+//    Defining the macro covers the floating point vector types.
+//
+//    SVE_PREDICATE_TYPE_ALL:
+//    - (Name, MangledName, Id, SingletonId, NumEls, NF)
+//    Defining the macro covers the boolean vector types.
 //
 // where:
 //
 //  - Name is the name of the builtin type.
 //
+//  - MangledName is the mangled name of the builtin type.
+//
 //  - BuiltinType::Id is the enumerator defining the type.
 //
 //  - Context.SingletonId is the global singleton of this type.
 //
 //  - ElKind enumerates the type of the elements.
 //
+//  - NumEls enumerates the number of the elements.
+//
 //  - ElBits is the size of one element in bits.
 //
+//  - NF enumerates the number of sub-vectors.
+//    TODO: Tuple types are represented as a concatination of "NumEls x ElBits"
+//    vectors.  This will be changed to become a struct containing NF vectors.
+//
 //  - IsSigned is true for vectors of signed integer elements and
 //    for vectors of floating-point elements.
 //
@@ -39,102 +59,134 @@
 //===----------------------------------------------------------------------===//
 
 #ifndef SVE_VECTOR_TYPE
-#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits,    \
-                        IsSigned, IsFP, IsBF)                                  \
+#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
   SVE_TYPE(Name, Id, SingletonId)
 #endif
 
+#ifndef SVE_VECTOR_TYPE_DETAILS
+#define SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, IsSigned, IsFP, IsBF) \
+  SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId)
+#endif
+
+#ifndef SVE_VECTOR_TYPE_BFLOAT
+#define SVE_VECTOR_TYPE_BFLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \
+  SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, true)
+#endif
+
+#ifndef SVE_VECTOR_TYPE_FLOAT
+#define SVE_VECTOR_TYPE_FLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \
+  SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, true, false)
+#endif
+
+#ifndef SVE_VECTOR_TYPE_INT
+#define SVE_VECTOR_TYPE_INT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, IsSigned) \
+  SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, IsSigned, false, false)
+#endif
+
 #ifndef SVE_PREDICATE_TYPE
-#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId, NumEls)         \
+#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId) \
   SVE_TYPE(Name, Id, SingletonId)
 #endif
 
+#ifndef SVE_PREDICATE_TYPE_ALL
+#define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \
+  SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId)
+#endif
+
 #ifndef SVE_OPAQUE_TYPE
-#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId)                    \
+#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId) \
   SVE_TYPE(Name, Id, SingletonId)
 #endif
 
 //===- Vector point types -----------------------------------------------===//
 
+SVE_VECTOR_TYPE_INT("__SVInt8_t",  "__SVInt8_t",  SveInt8,  SveInt8Ty, 16,  8, 1, true)
+SVE_VECTOR_TYPE_INT("__SVInt16_t", "__SVInt16_t", SveInt16, SveInt16Ty, 8, 16, 1, true)
+SVE_VECTOR_TYPE_INT("__SVInt32_t", "__SVInt32_t", SveInt32, SveInt32Ty, 4, 32, 1, true)
+SVE_VECTOR_TYPE_INT("__SVInt64_t", "__SVInt64_t", SveInt64, SveInt64Ty, 2, 64, 1, true)
 
-SVE_VECTOR_TYPE("__SVInt8_t", "__SVInt8_t",  SveInt8, SveInt8Ty, 16, 8, true, false, false)
-SVE_VECTOR_TYPE("__SVInt16_t", "__SVInt16_t", SveInt16, SveInt16Ty, 8, 16, true, false, false)
-SVE_VECTOR_TYPE("__SVInt32_t", "__SVInt32_t", SveInt32, SveInt32Ty, 4, 32, true, false, false)
-SVE_VECTOR_TYPE("__SVInt64_t", "__SVInt64_t", SveInt64, SveInt64Ty, 2, 64, true, false, false)
-
-SVE_VECTOR_TYPE("__SVUint8_t", "__SVUint8_t",  SveUint8, SveUint8Ty, 16, 8, false, false, false)
-SVE_VECTOR_TYPE("__SVUint16_t", "__SVUint16_t", SveUint16, SveUint16Ty, 8, 16, false, false, false)
-SVE_VECTOR_TYPE("__SVUint32_t", "__SVUint32_t", SveUint32, SveUint32Ty, 4, 32, false, false, false)
-SVE_VECTOR_TYPE("__SVUint64_t", "__SVUint64_t", SveUint64, SveUint64Ty, 2, 64, false, false, false)
+SVE_VECTOR_TYPE_INT("__SVUint8_t",  "__SVUint8_t",  SveUint8,  SveUint8Ty, 16, 8, 1, false)
+SVE_VECTOR_TYPE_INT("__SVUint16_t", "__SVUint16_t", SveUint16, SveUint16Ty, 8, 16, 1, false)
+SVE_VECTOR_TYPE_INT("__SVUint32_t", "__SVUint32_t", SveUint32, SveUint32Ty, 4, 32, 1, false)
+SVE_VECTOR_TYPE_INT("__SVUint64_t", "__SVUint64_t", SveUint64, SveUint64Ty, 2, 64, 1, false)
 
-SVE_VECTOR_TYPE("__SVFloat16_t", "__SVFloat16_t", SveFloat16, SveFloat16Ty, 8, 16, true, true, false)
-SVE_VECTOR_TYPE("__SVFloat32_t", "__SVFloat32_t", SveFloat32, SveFloat32Ty, 4, 32, true, true, false)
-SVE_VECTOR_TYPE("__SVFloat64_t", "__SVFloat64_t", SveFloat64, SveFloat64Ty, 2, 64, true, true, false)
+SVE_VECTOR_TYPE_FLOAT("__SVFloat16_t", "__SVFloat16_t", SveFloat16, SveFloat16Ty, 8, 16, 1)
+SVE_VECTOR_TYPE_FLOAT("__SVFloat32_t", "__SVFloat32_t", SveFloat32, SveFloat32Ty, 4, 32, 1)
+SVE_VECTOR_TYPE_FLOAT("__SVFloat64_t", "__SVFloat64_t", SveFloat64, SveFloat64Ty, 2, 64, 1)
 
-SVE_VECTOR_TYPE("__SVBfloat16_t", "__SVBfloat16_t", SveBFloat16, SveBFloat16Ty, 8, 16, true, false, true)
+SVE_VECTOR_TYPE_BFLOAT("__SVBfloat16_t", "__SVBfloat16_t", SveBFloat16, SveBFloat16Ty, 8, 16, 1)
 
 //
 // x2
 //
-SVE_VECTOR_TYPE("__clang_svint8x2_t", "svint8x2_t",  SveInt8x2, SveInt8x2Ty, 32, 8, true, false, false)
-SVE_VECTOR_TYPE("__clang_svint16x2_t", "svint16x2_t", SveInt16x2, SveInt16x2Ty, 16, 16, true, false, false)
-SVE_VECTOR_TYPE("__clang_svint32x2_t", "svint32x2_t", SveInt32x2, SveInt32x2Ty, 8, 32, true, false, false)
-SVE_VECTOR_TYPE("__clang_svint64x2_t", "svint64x2_t", SveInt64x2, SveInt64x2Ty, 4, 64, true, false, false)
 
-SVE_VECTOR_TYPE("__clang_svuint8x2_t", "svuint8x2_t",  SveUint8x2, SveUint8x2Ty, 32, 8, false, false, false)
-SVE_VECTOR_TYPE("__clang_svuint16x2_t", "svuint16x2_t", SveUint16x2, SveUint16x2Ty, 16, 16, false, false, false)
-SVE_VECTOR_TYPE("__clang_svuint32x2_t", "svuint32x2_t", SveUint32x2, SveUint32x2Ty, 8, 32, false, false, false)
-SVE_VECTOR_TYPE("__clang_svuint64x2_t", "svuint64x2_t", SveUint64x2, SveUint64x2Ty, 4, 64, false, false, false)
+SVE_VECTOR_TYPE_INT("__clang_svint8x2_t",  "svint8x2_t",  SveInt8x2,  SveInt8x2Ty, 16, 8, 2, true)
+SVE_VECTOR_TYPE_INT("__clang_svint16x2_t", "svint16x2_t", SveInt16x2, SveInt16x2Ty, 8, 16, 2, true)
+SVE_VECTOR_TYPE_INT("__clang_svint32x2_t", "svint32x2_t", SveInt32x2, SveInt32x2Ty, 4, 32, 2, true)
+SVE_VECTOR_TYPE_INT("__clang_svint64x2_t", "svint64x2_t", SveInt64x2, SveInt64x2Ty, 2, 64, 2, true)
 
-SVE_VECTOR_TYPE("__clang_svfloat16x2_t", "svfloat16x2_t", SveFloat16x2, SveFloat16x2Ty, 16, 16, true, true, false)
-SVE_VECTOR_TYPE("__clang_svfloat32x2_t", "svfloat32x2_t", SveFloat32x2, SveFloat32x2Ty, 8, 32, true, true, false)
-SVE_VECTOR_TYPE("__clang_svfloat64x2_t", "svfloat64x2_t", SveFloat64x2, SveFloat64x2Ty, 4, 64, true, true, false)
+SVE_VECTOR_TYPE_INT("__clang_svuint8x2_t",  "svuint8x2_t",  SveUint8x2,  SveUint8x2Ty, 16 , 8, 2, false)
+SVE_VECTOR_TYPE_INT("__clang_svuint16x2_t", "svuint16x2_t", SveUint16x2, SveUint16x2Ty, 8, 16, 2, false)
+SVE_VECTOR_TYPE_INT("__clang_svuint32x2_t", "svuint32x2_t", SveUint32x2, SveUint32x2Ty, 4, 32, 2, false)
+SVE_VECTOR_TYPE_INT("__clang_svuint64x2_t", "svuint64x2_t", SveUint64x2, SveUint64x2Ty, 2, 64, 2, false)
+
+SVE_VECTOR_TYPE_FLOAT("__clang_svfloat16x2_t", "svfloat16x2_t", SveFloat16x2, SveFloat16x2Ty, 8, 16, 2)
+SVE_VECTOR_TYPE_FLOAT("__clang_svfloat32x2_t", "svfloat32x2_t", SveFloat32x2, SveFloat32x2Ty, 4, 32, 2)
+SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x2_t", "svfloat64x2_t", SveFloat64x2, SveFloat64x2Ty, 2, 64, 2)
+
+SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x2_t", "svbfloat16x2_t", SveBFloat16x2, SveBFloat16x2Ty, 8, 16, 2)
 
-SVE_VECTOR_TYPE("__clang_svbfloat16x2_t", "svbfloat16x2_t", SveBFloat16x2, SveBFloat16x2Ty, 16, 16, true, false, true)
 //
 // x3
 //
-SVE_VECTOR_TYPE("__clang_svint8x3_t", "svint8x3_t",  SveInt8x3, SveInt8x3Ty, 48, 8, true, false, false)
-SVE_VECTOR_TYPE("__clang_svint16x3_t", "svint16x3_t", SveInt16x3, SveInt16x3Ty, 24, 16, true, false, false)
-SVE_VECTOR_TYPE("__clang_svint32x3_t", "svint32x3_t", SveInt32x3, SveInt32x3Ty, 12, 32, true, false, false)
-SVE_VECTOR_TYPE("__clang_svint64x3_t", "svint64x3_t", SveInt64x3, SveInt64x3Ty, 6, 64, true, false, false)
 
-SVE_VECTOR_TYPE("__clang_svuint8x3_t", "svuint8x3_t",  SveUint8x3, SveUint8x3Ty, 48, 8, false, false, false)
-SVE_VECTOR_TYPE("__clang_svuint16x3_t", "svuint16x3_t", SveUint16x3, SveUint16x3Ty, 24, 16, false, false, false)
-SVE_VECTOR_TYPE("__clang_svuint32x3_t", "svuint32x3_t", SveUint32x3, SveUint32x3Ty, 12, 32, false, false, false)
-SVE_VECTOR_TYPE("__clang_svuint64x3_t", "svuint64x3_t", SveUint64x3, SveUint64x3Ty, 6, 64, false, false, false)
+SVE_VECTOR_TYPE_INT("__clang_svint8x3_t",  "svint8x3_t",  SveInt8x3,  SveInt8x3Ty, 16,  8, 3, true)
+SVE_VECTOR_TYPE_INT("__clang_svint16x3_t", "svint16x3_t", SveInt16x3, SveInt16x3Ty, 8, 16, 3, true)
+SVE_VECTOR_TYPE_INT("__clang_svint32x3_t", "svint32x3_t", SveInt32x3, SveInt32x3Ty, 4, 32, 3, true)
+SVE_VECTOR_TYPE_INT("__clang_svint64x3_t", "svint64x3_t", SveInt64x3, SveInt64x3Ty, 2, 64, 3, true)
+
+SVE_VECTOR_TYPE_INT("__clang_svuint8x3_t",  "svuint8x3_t",  SveUint8x3,  SveUint8x3Ty, 16,  8, 3, false)
+SVE_VECTOR_TYPE_INT("__clang_svuint16x3_t", "svuint16x3_t", SveUint16x3, SveUint16x3Ty, 8, 16, 3, false)
+SVE_VECTOR_TYPE_INT("__clang_svuint32x3_t", "svuint32x3_t", SveUint32x3, SveUint32x3Ty, 4, 32, 3, false)
+SVE_VECTOR_TYPE_INT("__clang_svuint64x3_t", "svuint64x3_t", SveUint64x3, SveUint64x3Ty, 2, 64, 3, false)
 
-SVE_VECTOR_TYPE("__clang_svfloat16x3_t", "svfloat16x3_t", SveFloat16x3, SveFloat16x3Ty, 24, 16, true, true, false)
-SVE_VECTOR_TYPE("__clang_svfloat32x3_t", "svfloat32x3_t", SveFloat32x3, SveFloat32x3Ty, 12, 32, true, true, false)
-SVE_VECTOR_TYPE("__clang_svfloat64x3_t", "svfloat64x3_t", SveFloat64x3, SveFloat64x3Ty, 6, 64, true, true, false)
+SVE_VECTOR_TYPE_FLOAT("__clang_svfloat16x3_t", "svfloat16x3_t", SveFloat16x3, SveFloat16x3Ty, 8, 16, 3)
+SVE_VECTOR_TYPE_FLOAT("__clang_svfloat32x3_t", "svfloat32x3_t", SveFloat32x3, SveFloat32x3Ty, 4, 32, 3)
+SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x3_t", "svfloat64x3_t", SveFloat64x3, SveFloat64x3Ty, 2, 64, 3)
+
+SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x3_t", "svbfloat16x3_t", SveBFloat16x3, SveBFloat16x3Ty, 8, 16, 3)
 
-SVE_VECTOR_TYPE("__clang_svbfloat16x3_t", "svbfloat16x3_t", SveBFloat16x3, SveBFloat16x3Ty, 24, 16, true, false, true)
 //
 // x4
 //
-SVE_VECTOR_TYPE("__clang_svint8x4_t", "svint8x4_t",  SveInt8x4, SveInt8x4Ty, 64, 8, true, false, false)
-SVE_VECTOR_TYPE("__clang_svint16x4_t", "svint16x4_t", SveInt16x4, SveInt16x4Ty, 32, 16, true, false, false)
-SVE_VECTOR_TYPE("__clang_svint32x4_t", "svint32x4_t", SveInt32x4, SveInt32x4Ty, 16, 32, true, false, false)
-SVE_VECTOR_TYPE("__clang_svint64x4_t", "svint64x4_t", SveInt64x4, SveInt64x4Ty, 8, 64, true, false, false)
 
-SVE_VECTOR_TYPE("__clang_svuint8x4_t", "svuint8x4_t",  SveUint8x4, SveUint8x4Ty, 64, 8, false, false, false)
-SVE_VECTOR_TYPE("__clang_svuint16x4_t", "svuint16x4_t", SveUint16x4, SveUint16x4Ty, 32, 16, false, false, false)
-SVE_VECTOR_TYPE("__clang_svuint32x4_t", "svuint32x4_t", SveUint32x4, SveUint32x4Ty, 16, 32, false, false, false)
-SVE_VECTOR_TYPE("__clang_svuint64x4_t", "svuint64x4_t", SveUint64x4, SveUint64x4Ty, 8, 64, false, false, false)
+SVE_VECTOR_TYPE_INT("__clang_svint8x4_t",  "svint8x4_t",  SveInt8x4,  SveInt8x4Ty, 16,  8, 4, true)
+SVE_VECTOR_TYPE_INT("__clang_svint16x4_t", "svint16x4_t", SveInt16x4, SveInt16x4Ty, 8, 16, 4, true)
+SVE_VECTOR_TYPE_INT("__clang_svint32x4_t", "svint32x4_t", SveInt32x4, SveInt32x4Ty, 4, 32, 4, true)
+SVE_VECTOR_TYPE_INT("__clang_svint64x4_t", "svint64x4_t", SveInt64x4, SveInt64x4Ty, 2, 64, 4, true)
+
+SVE_VECTOR_TYPE_INT("__clang_svuint8x4_t",  "svuint8x4_t",  SveUint8x4,  SveUint8x4Ty, 16,  8, 4, false)
+SVE_VECTOR_TYPE_INT("__clang_svuint16x4_t", "svuint16x4_t", SveUint16x4, SveUint16x4Ty, 8, 16, 4, false)
+SVE_VECTOR_TYPE_INT("__clang_svuint32x4_t", "svuint32x4_t", SveUint32x4, SveUint32x4Ty, 4, 32, 4, false)
+SVE_VECTOR_TYPE_INT("__clang_svuint64x4_t", "svuint64x4_t", SveUint64x4, SveUint64x4Ty, 2, 64, 4, false)
 
-SVE_VECTOR_TYPE("__clang_svfloat16x4_t", "svfloat16x4_t", SveFloat16x4, SveFloat16x4Ty, 32, 16, true, true, false)
-SVE_VECTOR_TYPE("__clang_svfloat32x4_t", "svfloat32x4_t", SveFloat32x4, SveFloat32x4Ty, 16, 32, true, true, false)
-SVE_VECTOR_TYPE("__clang_svfloat64x4_t", "svfloat64x4_t", SveFloat64x4, SveFloat64x4Ty, 8, 64, true, true, false)
+SVE_VECTOR_TYPE_FLOAT("__clang_svfloat16x4_t", "svfloat16x4_t", SveFloat16x4, SveFloat16x4Ty, 8, 16, 4)
+SVE_VECTOR_TYPE_FLOAT("__clang_svfloat32x4_t", "svfloat32x4_t", SveFloat32x4, SveFloat32x4Ty, 4, 32, 4)
+SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x4_t", "svfloat64x4_t", SveFloat64x4, SveFloat64x4Ty, 2, 64, 4)
 
-SVE_VECTOR_TYPE("__clang_svbfloat16x4_t", "svbfloat16x4_t", SveBFloat16x4, SveBFloat16x4Ty, 32, 16, true, false, true)
+SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x4_t", "svbfloat16x4_t", SveBFloat16x4, SveBFloat16x4Ty, 8, 16, 4)
 
-SVE_PREDICATE_TYPE("__SVBool_t", "__SVBool_t", SveBool, SveBoolTy, 16)
-SVE_PREDICATE_TYPE("__clang_svboolx2_t", "svboolx2_t", SveBoolx2, SveBoolx2Ty, 32)
-SVE_PREDICATE_TYPE("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4Ty, 64)
+SVE_PREDICATE_TYPE_ALL("__SVBool_t", "__SVBool_t", SveBool, SveBoolTy, 16, 1)
+SVE_PREDICATE_TYPE_ALL("__clang_svboolx2_t", "svboolx2_t", SveBoolx2, SveBoolx2Ty, 16, 2)
+SVE_PREDICATE_TYPE_ALL("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4Ty, 16, 4)
 
 SVE_OPAQUE_TYPE("__SVCount_t", "__SVCount_t", SveCount, SveCountTy)
 
 #undef SVE_VECTOR_TYPE
+#undef SVE_VECTOR_TYPE_BFLOAT
+#undef SVE_VECTOR_TYPE_FLOAT
+#undef SVE_VECTOR_TYPE_INT
 #undef SVE_PREDICATE_TYPE
+#undef SVE_PREDICATE_TYPE_ALL
 #undef SVE_OPAQUE_TYPE
 #undef SVE_TYPE
diff --git clang/include/clang/Basic/Attr.td clang/include/clang/Basic/Attr.td
index 8d2a362abc3c..9a7b163b2c6d 100644
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -1250,6 +1250,14 @@ def CoroDisableLifetimeBound : InheritableAttr {
   let SimpleHandler = 1;
 }
 
+def CoroAwaitElidable : InheritableAttr {
+  let Spellings = [Clang<"coro_await_elidable">];
+  let Subjects = SubjectList<[CXXRecord]>;
+  let LangOpts = [CPlusPlus];
+  let Documentation = [CoroAwaitElidableDoc];
+  let SimpleHandler = 1;
+}
+
 // OSObject-based attributes.
 def OSConsumed : InheritableParamAttr {
   let Spellings = [Clang<"os_consumed">];
@@ -4643,16 +4651,14 @@ def HLSLResource : InheritableAttr {
   let Documentation = [InternalOnly];
 }
 
-def HLSLROV : InheritableAttr {
+def HLSLROV : TypeAttr {
   let Spellings = [CXX11<"hlsl", "is_rov">];
-  let Subjects = SubjectList<[Struct]>;
   let LangOpts = [HLSL]; 
   let Documentation = [InternalOnly];
 }
 
-def HLSLResourceClass : InheritableAttr {
+def HLSLResourceClass : TypeAttr {
   let Spellings = [CXX11<"hlsl", "resource_class">];
-  let Subjects = SubjectList<[Field]>;
   let LangOpts = [HLSL];
   let Args = [
 	EnumArgument<"ResourceClass", "llvm::hlsl::ResourceClass",
diff --git clang/include/clang/Basic/AttrDocs.td clang/include/clang/Basic/AttrDocs.td
index ef077db29883..546e5100b79d 100644
--- clang/include/clang/Basic/AttrDocs.td
+++ clang/include/clang/Basic/AttrDocs.td
@@ -8255,6 +8255,38 @@ but do not pass them to the underlying coroutine or pass them by value.
 }];
 }
 
+def CoroAwaitElidableDoc : Documentation {
+  let Category = DocCatDecl;
+  let Content = [{
+The ``[[clang::coro_await_elidable]]`` is a class attribute which can be applied
+to a coroutine return type.
+
+When a coroutine function that returns such a type calls another coroutine function,
+the compiler performs heap allocation elision when the call to the coroutine function
+is immediately co_awaited as a prvalue. In this case, the coroutine frame for the
+callee will be a local variable within the enclosing braces in the caller's stack
+frame. And the local variable, like other variables in coroutines, may be collected
+into the coroutine frame, which may be allocated on the heap.
+
+Example:
+
+.. code-block:: c++
+
+  class [[clang::coro_await_elidable]] Task { ... };
+
+  Task foo();
+  Task bar() {
+    co_await foo(); // foo()'s coroutine frame on this line is elidable
+    auto t = foo(); // foo()'s coroutine frame on this line is NOT elidable
+    co_await t;
+  }
+
+The behavior is undefined if the caller coroutine is destroyed earlier than the
+callee coroutine.
+
+}];
+}
+
 def CountedByDocs : Documentation {
   let Category = DocCatField;
   let Content = [{
@@ -8414,4 +8446,3 @@ Declares that a function potentially allocates heap memory, and prevents any pot
 of ``nonallocating`` by the compiler.
   }];
 }
-
diff --git clang/include/clang/Basic/Builtins.td clang/include/clang/Basic/Builtins.td
index 8668b25661de..3dc04f68b317 100644
--- clang/include/clang/Basic/Builtins.td
+++ clang/include/clang/Basic/Builtins.td
@@ -934,6 +934,12 @@ def IsConstantEvaluated : LangBuiltin<"CXX_LANG"> {
   let Prototype = "bool()";
 }
 
+def IsWithinLifetime : LangBuiltin<"CXX_LANG"> {
+  let Spellings = ["__builtin_is_within_lifetime"];
+  let Attributes = [NoThrow, CustomTypeChecking, Consteval];
+  let Prototype = "bool(void*)";
+}
+
 // GCC exception builtins
 def EHReturn : Builtin {
   let Spellings = ["__builtin_eh_return"];
@@ -4679,6 +4685,12 @@ def HLSLWaveGetLaneIndex : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "unsigned int()";
 }
 
+def HLSLWaveIsFirstLane : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_wave_is_first_lane"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "bool()";
+}
+
 def HLSLClamp : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_elementwise_clamp"];
   let Attributes = [NoThrow, Const];
@@ -4751,6 +4763,18 @@ def HLSLSaturate : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLSelect : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_select"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
+def HLSLSign : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_sign"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git clang/include/clang/Basic/BuiltinsAMDGPU.def clang/include/clang/Basic/BuiltinsAMDGPU.def
index ab29ef38f779..c02970f55b22 100644
--- clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -448,6 +448,8 @@ TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vi", "n", "gfx12-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vi", "n", "gfx12-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "b", "n", "gfx12-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_get_barrier_state, "Uii", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_prefetch_data, "vvC*Ui", "nc", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_buffer_prefetch_data, "vQbIiUi", "nc", "gfx12-insts")
 
 TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b64_v2i32, "V2iV2i*1", "nc", "gfx12-insts,wavefrontsize32")
 TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b128_v8i16, "V8sV8s*1", "nc", "gfx12-insts,wavefrontsize32")
diff --git clang/include/clang/Basic/BuiltinsWebAssembly.def clang/include/clang/Basic/BuiltinsWebAssembly.def
index 2e80eef2c8b9..ad73f031922a 100644
--- clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -209,8 +209,8 @@ TARGET_BUILTIN(__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4, "V4fV8UsV8UsV4f"
 TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fh*", "nU", "fp16")
 TARGET_BUILTIN(__builtin_wasm_storef16_f32, "vfh*", "n", "fp16")
 TARGET_BUILTIN(__builtin_wasm_splat_f16x8, "V8hf", "nc", "fp16")
-TARGET_BUILTIN(__builtin_wasm_extract_lane_f16x8, "fV8hi", "nc", "fp16")
-TARGET_BUILTIN(__builtin_wasm_replace_lane_f16x8, "V8hV8hif", "nc", "fp16")
+TARGET_BUILTIN(__builtin_wasm_extract_lane_f16x8, "fV8hIi", "nc", "fp16")
+TARGET_BUILTIN(__builtin_wasm_replace_lane_f16x8, "V8hV8hIif", "nc", "fp16")
 
 // Reference Types builtins
 // Some builtins are custom type-checked - see 't' as part of the third argument,
diff --git clang/include/clang/Basic/BuiltinsX86.def clang/include/clang/Basic/BuiltinsX86.def
index 48376ee05279..3f47e34cc909 100644
--- clang/include/clang/Basic/BuiltinsX86.def
+++ clang/include/clang/Basic/BuiltinsX86.def
@@ -2122,6 +2122,36 @@ TARGET_BUILTIN(__builtin_ia32_vpdpwuud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniin
 TARGET_BUILTIN(__builtin_ia32_vpdpwuuds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vpdpwuuds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256")
 
+// AVX10.2 SATCVT-DS
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2sis32, "iV2dIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2usis32, "UiV2dIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2sis32, "iV4fIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2usis32, "UiV4fIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs128_mask, "V4iV2dV4iUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs512_round_mask, "V8iV8dV8iUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs128_mask, "V4iV2dV4iUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs512_round_mask, "V8iV8dV8iUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs128_mask,  "V2OiV2dV2OiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs256_round_mask, "V4OiV4dV4OiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs512_round_mask, "V8OiV8dV8OiUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs128_mask, "V2OiV2dV2OiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs256_round_mask, "V4OiV4dV4OiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs512_round_mask, "V8OiV8dV8OiUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs128_mask, "V4iV4fV4iUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs512_round_mask, "V16iV16fV16iUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs128_mask, "V4iV4fV4iUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs512_round_mask, "V16iV16fV16iUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs128_mask, "V2OiV4fV2OiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs256_round_mask, "V4OiV4fV4OiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs512_round_mask, "V8OiV8fV8OiUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs128_mask, "V2OiV4fV2OiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs256_round_mask, "V4OiV4fV4OiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs512_round_mask, "V8OiV8fV8OiUcIi", "nV:512:", "avx10.2-512")
+
 // AVX-NE-CONVERT
 TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps128, "V4fyC*", "nV:128:", "avxneconvert")
 TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps256, "V8fyC*", "nV:256:", "avxneconvert")
diff --git clang/include/clang/Basic/BuiltinsX86_64.def clang/include/clang/Basic/BuiltinsX86_64.def
index 5e00916d4b25..db381aa77e76 100644
--- clang/include/clang/Basic/BuiltinsX86_64.def
+++ clang/include/clang/Basic/BuiltinsX86_64.def
@@ -99,6 +99,12 @@ TARGET_BUILTIN(__builtin_ia32_vcvttsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri")
 
+// AVX10.2 SATCVT-DS
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2sis64, "OiV2dIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2usis64, "UOiV2dIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2sis64, "OiV4fIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2usis64, "UOiV4fIi", "ncV:128:", "avx10.2-256")
+
 // UINTR
 TARGET_BUILTIN(__builtin_ia32_clui, "v", "n", "uintr")
 TARGET_BUILTIN(__builtin_ia32_stui, "v", "n", "uintr")
diff --git clang/include/clang/Basic/CMakeLists.txt clang/include/clang/Basic/CMakeLists.txt
index 2ef6ddc68f4b..f069f4fc118f 100644
--- clang/include/clang/Basic/CMakeLists.txt
+++ clang/include/clang/Basic/CMakeLists.txt
@@ -67,6 +67,9 @@ clang_tablegen(arm_neon.inc -gen-arm-neon-sema
 clang_tablegen(arm_fp16.inc -gen-arm-neon-sema
   SOURCE arm_fp16.td
   TARGET ClangARMFP16)
+clang_tablegen(arm_immcheck_types.inc -gen-arm-immcheck-types
+  SOURCE arm_sve.td
+  TARGET ClangARMImmChecks)
 clang_tablegen(arm_mve_builtins.inc -gen-arm-mve-builtin-def
   SOURCE arm_mve.td
   TARGET ClangARMMveBuiltinsDef)
diff --git clang/include/clang/Basic/DiagnosticASTKinds.td clang/include/clang/Basic/DiagnosticASTKinds.td
index 45ad84831589..21a307d1e898 100644
--- clang/include/clang/Basic/DiagnosticASTKinds.td
+++ clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -148,8 +148,6 @@ def note_constexpr_var_init_weak : Note<
 def note_constexpr_typeid_polymorphic : Note<
   "typeid applied to expression of polymorphic type %0 is "
   "not allowed in a constant expression in C++ standards before C++20">;
-def note_constexpr_void_comparison : Note<
-  "comparison between unequal pointers to void has unspecified result">;
 def note_constexpr_temporary_here : Note<"temporary created here">;
 def note_constexpr_dynamic_alloc_here : Note<"heap allocation performed here">;
 def note_constexpr_conditional_never_const : Note<
@@ -169,14 +167,14 @@ def note_constexpr_this : Note<
 def access_kind : TextSubstitution<
   "%select{read of|read of|assignment to|increment of|decrement of|"
   "member call on|dynamic_cast of|typeid applied to|construction of|"
-  "destruction of}0">;
+  "destruction of|read of}0">;
 def access_kind_subobject : TextSubstitution<
   "%select{read of|read of|assignment to|increment of|decrement of|"
   "member call on|dynamic_cast of|typeid applied to|"
-  "construction of subobject of|destruction of}0">;
+  "construction of subobject of|destruction of|read of}0">;
 def access_kind_volatile : TextSubstitution<
   "%select{read of|read of|assignment to|increment of|decrement of|"
-  "<ERROR>|<ERROR>|<ERROR>|<ERROR>|<ERROR>}0">;
+  "<ERROR>|<ERROR>|<ERROR>|<ERROR>|<ERROR>|<ERROR>}0">;
 def note_constexpr_lifetime_ended : Note<
   "%sub{access_kind}0 %select{temporary|variable}1 whose "
   "%plural{8:storage duration|:lifetime}0 has ended">;
@@ -409,6 +407,12 @@ def warn_is_constant_evaluated_always_true_constexpr : Warning<
   "'%0' will always evaluate to 'true' in a manifestly constant-evaluated expression">,
   InGroup<DiagGroup<"constant-evaluated">>;
 
+def err_invalid_is_within_lifetime : Note<
+  "'%0' cannot be called with "
+  "%select{a null pointer|a one-past-the-end pointer|"
+  "a pointer to an object whose lifetime has not yet begun}1"
+>;
+
 // inline asm related.
 let CategoryName = "Inline Assembly Issue" in {
   def err_asm_invalid_escape : Error<
diff --git clang/include/clang/Basic/DiagnosticGroups.td clang/include/clang/Basic/DiagnosticGroups.td
index c4c29942ee1c..116ce7a04f66 100644
--- clang/include/clang/Basic/DiagnosticGroups.td
+++ clang/include/clang/Basic/DiagnosticGroups.td
@@ -1558,7 +1558,8 @@ def ReadOnlyPlacementChecks : DiagGroup<"read-only-types">;
 
 // Warnings and fixes to support the "safe buffers" programming model.
 def UnsafeBufferUsageInContainer : DiagGroup<"unsafe-buffer-usage-in-container">;
-def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInContainer]>;
+def UnsafeBufferUsageInLibcCall : DiagGroup<"unsafe-buffer-usage-in-libc-call">;
+def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInContainer, UnsafeBufferUsageInLibcCall]>;
 
 // Warnings and notes related to the function effects system underlying
 // the nonblocking and nonallocating attributes.
diff --git clang/include/clang/Basic/DiagnosticLexKinds.td clang/include/clang/Basic/DiagnosticLexKinds.td
index 12d7b8c0205e..fc14bb6aa216 100644
--- clang/include/clang/Basic/DiagnosticLexKinds.td
+++ clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -283,6 +283,9 @@ def warn_cxx98_compat_unicode_literal : Warning<
 def warn_cxx14_compat_u8_character_literal : Warning<
   "unicode literals are incompatible with C++ standards before C++17">,
   InGroup<CXXPre17Compat>, DefaultIgnore;
+def warn_c17_compat_u8_character_literal : Warning<
+  "unicode literals are incompatible with C standards before C23">,
+  InGroup<CPre23Compat>, DefaultIgnore;
 def warn_cxx11_compat_user_defined_literal : Warning<
   "identifier after literal will be treated as a user-defined literal suffix "
   "in C++11">, InGroup<CXX11Compat>, DefaultIgnore;
diff --git clang/include/clang/Basic/DiagnosticParseKinds.td clang/include/clang/Basic/DiagnosticParseKinds.td
index 0b8ab4bf0925..0aa2c4a70849 100644
--- clang/include/clang/Basic/DiagnosticParseKinds.td
+++ clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -470,6 +470,12 @@ def warn_c17_compat_static_assert_no_message : Warning<
   "'_Static_assert' with no message is incompatible with C standards before "
   "C23">,
   DefaultIgnore, InGroup<CPre23Compat>;
+def ext_cxx_static_assert_user_generated_message : ExtWarn<
+  "'static_assert' with a user-generated message is a C++26 extension">,
+  InGroup<CXX26>;
+def warn_cxx20_compat_static_assert_user_generated_message : Warning<
+  "'static_assert' with a user-generated message is incompatible with "
+  "C++ standards before C++26">, DefaultIgnore, InGroup<CXXPre26Compat>;
 def err_function_definition_not_allowed : Error<
   "function definition is not allowed here">;
 def err_expected_end_of_enumerator : Error<
diff --git clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Basic/DiagnosticSemaKinds.td
index dcb49d8a6760..efdc058edca5 100644
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9206,6 +9206,9 @@ def err_typecheck_expect_scalar_operand : Error<
   "operand of type %0 where arithmetic or pointer type is required">;
 def err_typecheck_cond_incompatible_operands : Error<
   "incompatible operand types%diff{ ($ and $)|}0,1">;
+def err_typecheck_expect_scalar_or_vector : Error<
+  "invalid operand of type %0 where %1 or "
+  "a vector of such type is required">;
 def err_typecheck_expect_flt_or_vector : Error<
   "invalid operand of type %0 where floating, complex or "
   "a vector of such types is required">;
@@ -10159,13 +10162,6 @@ def warn_dangling_pointer_assignment : Warning<
    "will be destroyed at the end of the full-expression">,
    InGroup<DanglingAssignment>;
 
-def warn_unsupported_lifetime_extension : Warning<
-  "lifetime extension of "
-  "%select{temporary|backing array of initializer list}0 created "
-  "by aggregate initialization using a default member initializer "
-  "is not yet supported; lifetime of %select{temporary|backing array}0 "
-  "will end at the end of the full-expression">, InGroup<Dangling>;
-
 // For non-floating point, expressions of the form x == x or x != x
 // should result in a warning, since these always evaluate to a constant.
 // Array comparisons have similar warnings
@@ -12186,6 +12182,10 @@ def err_builtin_launder_invalid_arg : Error<
   "%select{non-pointer|function pointer|void pointer}0 argument to "
   "'__builtin_launder' is not allowed">;
 
+def err_builtin_is_within_lifetime_invalid_arg : Error<
+  "%select{non-|function }0pointer argument to '__builtin_is_within_lifetime' "
+  "is not allowed">;
+
 def err_builtin_invalid_arg_type: Error <
   "%ordinal0 argument must be "
   "%select{a vector, integer or floating point type|a matrix|"
@@ -12364,6 +12364,7 @@ def err_hlsl_packoffset_cross_reg_boundary : Error<"packoffset cannot cross regi
 def err_hlsl_packoffset_alignment_mismatch : Error<"packoffset at 'y' not match alignment %0 required by %1">;
 def err_hlsl_pointers_unsupported : Error<
   "%select{pointers|references}0 are unsupported in HLSL">;
+def err_hlsl_missing_resource_class : Error<"HLSL resource needs to have [[hlsl::resource_class()]] attribute">;
 
 def err_hlsl_operator_unsupported : Error<
   "the '%select{&|*|->}0' operator is unsupported in HLSL">;
@@ -12412,6 +12413,13 @@ def warn_unsafe_buffer_operation : Warning<
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data|"
   "field %1 prone to unsafe buffer manipulation}0">,
   InGroup<UnsafeBufferUsage>, DefaultIgnore;
+def warn_unsafe_buffer_libc_call : Warning<
+  "function %0 is unsafe">,
+  InGroup<UnsafeBufferUsageInLibcCall>, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{|change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match"
+          "|string argument is not guaranteed to be null-terminated"
+          "|'va_list' is unsafe}0">;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
 def note_unsafe_buffer_variable_fixit_group : Note<
diff --git clang/include/clang/Basic/Features.def clang/include/clang/Basic/Features.def
index 10538f555b41..7f5d26118bdc 100644
--- clang/include/clang/Basic/Features.def
+++ clang/include/clang/Basic/Features.def
@@ -54,6 +54,8 @@ FEATURE(memtag_globals,
 FEATURE(xray_instrument, LangOpts.XRayInstrument)
 FEATURE(undefined_behavior_sanitizer,
         LangOpts.Sanitize.hasOneOf(SanitizerKind::Undefined))
+FEATURE(realtime_sanitizer,
+        LangOpts.Sanitize.has(SanitizerKind::Realtime))
 FEATURE(coverage_sanitizer, LangOpts.SanitizeCoverage)
 FEATURE(assume_nonnull, true)
 FEATURE(attribute_analyzer_noreturn, true)
diff --git clang/include/clang/Basic/OpenMPKinds.h clang/include/clang/Basic/OpenMPKinds.h
index 16bb967f89d5..1acdafa85722 100644
--- clang/include/clang/Basic/OpenMPKinds.h
+++ clang/include/clang/Basic/OpenMPKinds.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_BASIC_OPENMPKINDS_H
 
 #include "clang/Basic/LangOptions.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 
@@ -389,5 +390,9 @@ bool isOpenMPInformationalDirective(OpenMPDirectiveKind DKind);
 bool isOpenMPCapturingDirective(OpenMPDirectiveKind DKind);
 }
 
+template <>
+struct llvm::enum_iteration_traits<clang::OpenMPDefaultmapClauseKind> {
+  static constexpr bool is_iterable = true;
+};
 #endif
 
diff --git clang/include/clang/Basic/SourceManager.h clang/include/clang/Basic/SourceManager.h
index d3ccc7ef81c0..e0f1ea435d54 100644
--- clang/include/clang/Basic/SourceManager.h
+++ clang/include/clang/Basic/SourceManager.h
@@ -724,7 +724,7 @@ class SourceManager : public RefCountedBase<SourceManager> {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  llvm::PagedVector<SrcMgr::SLocEntry> LoadedSLocEntryTable;
+  llvm::PagedVector<SrcMgr::SLocEntry, 32> LoadedSLocEntryTable;
 
   /// For each allocation in LoadedSLocEntryTable, we keep the first FileID.
   /// We assume exactly one allocation per AST file, and use that to determine
diff --git clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/TargetBuiltins.h
index 4333830bf34f..d0f41b17c154 100644
--- clang/include/clang/Basic/TargetBuiltins.h
+++ clang/include/clang/Basic/TargetBuiltins.h
@@ -216,6 +216,35 @@ namespace clang {
     }
     bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
     bool isQuad() const { return (Flags & QuadFlag) != 0; }
+    unsigned getEltSizeInBits() const {
+      switch (getEltType()) {
+      case Int8:
+      case Poly8:
+        return 8;
+      case Int16:
+      case Float16:
+      case Poly16:
+      case BFloat16:
+        return 16;
+      case Int32:
+      case Float32:
+        return 32;
+      case Int64:
+      case Float64:
+      case Poly64:
+        return 64;
+      case Poly128:
+        return 128;
+      }
+      llvm_unreachable("Invalid NeonTypeFlag!");
+    }
+  };
+
+  // Shared between SVE/SME and NEON
+  enum ImmCheckType {
+#define LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
+#include "clang/Basic/arm_immcheck_types.inc"
+#undef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
   };
 
   /// Flags to identify the types for overloaded SVE builtins.
@@ -249,12 +278,6 @@ namespace clang {
 #undef LLVM_GET_SVE_MERGETYPES
     };
 
-    enum ImmCheckType {
-#define LLVM_GET_SVE_IMMCHECKTYPES
-#include "clang/Basic/arm_sve_typeflags.inc"
-#undef LLVM_GET_SVE_IMMCHECKTYPES
-    };
-
     SVETypeFlags(uint64_t F) : Flags(F) {
       EltTypeShift = llvm::countr_zero(EltTypeMask);
       MemEltTypeShift = llvm::countr_zero(MemEltTypeMask);
diff --git clang/include/clang/Basic/TokenKinds.def clang/include/clang/Basic/TokenKinds.def
index 212c1f6ff3a1..a82ff684b2ac 100644
--- clang/include/clang/Basic/TokenKinds.def
+++ clang/include/clang/Basic/TokenKinds.def
@@ -660,8 +660,9 @@ KEYWORD(out                         , KEYHLSL)
 #define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) KEYWORD(Name, KEYHLSL)
 #include "clang/Basic/HLSLIntangibleTypes.def"
 
-// HLSL Type traits.
+// HLSL Type traits
 TYPE_TRAIT_2(__builtin_hlsl_is_scalarized_layout_compatible, IsScalarizedLayoutCompatible, KEYHLSL)
+TYPE_TRAIT_1(__builtin_hlsl_is_intangible, IsIntangibleType, KEYHLSL)
 
 // OpenMP Type Traits
 UNARY_EXPR_OR_TYPE_TRAIT(__builtin_omp_required_simd_align, OpenMPRequiredSimdAlign, KEYALL)
diff --git clang/include/clang/Basic/arm_fp16.td clang/include/clang/Basic/arm_fp16.td
index d36b4617bef5..ed26e84af075 100644
--- clang/include/clang/Basic/arm_fp16.td
+++ clang/include/clang/Basic/arm_fp16.td
@@ -76,17 +76,23 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def SCALAR_FCVTPUH  : SInst<"vcvtp_u16", "(1U)1", "Sh">;
   def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "(1U>)1", "Sh">;
   def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "(1U>>)1", "Sh">;
-  let isVCVT_N = 1 in {
+  let ImmChecks = [ImmCheck<1, ImmCheck1_16>] in {
     def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "(1F)(1!)I", "sUs">;
     def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">;
     def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">;
-    def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh">;
-    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh">;
-    def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh">;
-    def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "(1U)1I", "Sh">;
-    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "(1U>)1I", "Sh">;
-    def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "(1U>>)1I", "Sh">;
   }
+    def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
+    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
+    def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
+    def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "(1U)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
+    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "(1U>)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
+    def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "(1U>>)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
   // Comparison
   def SCALAR_CMEQRH   : SInst<"vceq", "(1U)11", "Sh">;
   def SCALAR_CMEQZH   : SInst<"vceqz", "(1U)1", "Sh">;
diff --git clang/include/clang/Basic/arm_immcheck_incl.td clang/include/clang/Basic/arm_immcheck_incl.td
new file mode 100644
index 000000000000..9d7f74a35aaa
--- /dev/null
+++ clang/include/clang/Basic/arm_immcheck_incl.td
@@ -0,0 +1,43 @@
+class ImmCheckType<int val> {
+  int Value = val;
+}
+
+// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
+def ImmCheck0_31                : ImmCheckType<0>;  // 0..31 (used for e.g. predicate patterns)
+def ImmCheck1_16                : ImmCheckType<1>;  // 1..16
+def ImmCheckExtract             : ImmCheckType<2>;  // 0..(2048/sizeinbits(elt) - 1)
+def ImmCheckShiftRight          : ImmCheckType<3>;  // 1..sizeinbits(elt)
+def ImmCheckShiftRightNarrow    : ImmCheckType<4>;  // 1..sizeinbits(elt)/2
+def ImmCheckShiftLeft           : ImmCheckType<5>;  // 0..(sizeinbits(elt) - 1)
+def ImmCheck0_7                 : ImmCheckType<6>;  // 0..7
+def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(sizeinbits(vec)/(sizeinbits(elt)) - 1)
+def ImmCheckCvt                 : ImmCheckType<8>;  // 1..sizeinbits(elt) (same as ShiftRight)
+def ImmCheckLaneIndexCompRotate : ImmCheckType<9>;  // 0..(sizeinbits(vec)/(2*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexDot        : ImmCheckType<10>; // 0..(sizeinbits(vec)/(4*sizeinbits(elt)) - 1)
+def ImmCheckComplexRot90_270    : ImmCheckType<11>; // [90,270]
+def ImmCheckComplexRotAll90     : ImmCheckType<12>; // [0, 90, 180,270]
+def ImmCheck0_13                : ImmCheckType<13>; // 0..13
+def ImmCheck0_1                 : ImmCheckType<14>; // 0..1
+def ImmCheck0_2                 : ImmCheckType<15>; // 0..2
+def ImmCheck0_3                 : ImmCheckType<16>; // 0..3
+def ImmCheck0_0                 : ImmCheckType<17>; // 0..0
+def ImmCheck0_15                : ImmCheckType<18>; // 0..15
+def ImmCheck0_255               : ImmCheckType<19>; // 0..255
+def ImmCheck2_4_Mul2            : ImmCheckType<20>; // 2, 4
+def ImmCheck1_1                 : ImmCheckType<21>; // 1..1
+def ImmCheck1_3                 : ImmCheckType<22>; // 1..3
+def ImmCheck1_7                 : ImmCheckType<23>; // 1..7
+def ImmCheck1_32                : ImmCheckType<24>; // 1..32
+def ImmCheck1_64                : ImmCheckType<25>; // 1..64
+def ImmCheck0_63                : ImmCheckType<26>; // 0..63
+
+class ImmCheck<int immArgIdx, ImmCheckType kind, int typeArgIdx = -1> {
+  // Parameter index of immediate argument to be verified
+  int ImmArgIdx = immArgIdx;
+
+  // Parameter index of argument whose type determines the context of this immediate check -
+  // element type for SVE/SME, element type and vector size for NEON (ignoring element type for
+  // ClassB NEON intrinsics).
+  int TypeContextArgIdx = typeArgIdx;
+  ImmCheckType Kind = kind;
+}
diff --git clang/include/clang/Basic/arm_neon.td clang/include/clang/Basic/arm_neon.td
index 3098fa67e6a5..92f39744f3d0 100644
--- clang/include/clang/Basic/arm_neon.td
+++ clang/include/clang/Basic/arm_neon.td
@@ -284,16 +284,17 @@ def OP_CVT_F32_BF16
 
 // Splat operation - performs a range-checked splat over a vector
 def SPLAT  : WInst<"splat_lane", ".(!q)I",
-                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl">;
+                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl",
+                    [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
-                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl"> {
-  let isLaneQ = 1;
-}
+                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl",
+                   [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+
 let TargetGuard = "bf16,neon" in {
-  def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb">;
-  def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb"> {
-    let isLaneQ = 1;
-  }
+  def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb",
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+  def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb",
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -401,27 +402,57 @@ def VQRSHL : SInst<"vqrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.12 Shifts by constant
 let isShift = 1 in {
-def VSHR_N     : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VSHL_N     : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VRSHR_N    : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VSRA_N     : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VRSRA_N    : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VQSHL_N    : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VQSHLU_N   : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl">;
-def VSHRN_N    : IInst<"vshrn_n", "<QI", "silUsUiUl">;
-def VQSHRUN_N  : SInst<"vqshrun_n", "(<U)QI", "sil">;
-def VQRSHRUN_N : SInst<"vqrshrun_n", "(<U)QI", "sil">;
-def VQSHRN_N   : SInst<"vqshrn_n", "<QI", "silUsUiUl">;
-def VRSHRN_N   : IInst<"vrshrn_n", "<QI", "silUsUiUl">;
-def VQRSHRN_N  : SInst<"vqrshrn_n", "<QI", "silUsUiUl">;
-def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi">;
+
+
+def VSHR_N     : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VSHL_N     : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
+                      [ImmCheck<1, ImmCheckShiftLeft>]>;
+def VRSHR_N    : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VSRA_N     : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
+                      [ImmCheck<2, ImmCheckShiftRight>]>;
+def VRSRA_N    : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
+                      [ImmCheck<2, ImmCheckShiftRight>]>;
+def VQSHL_N    : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
+                      [ImmCheck<1, ImmCheckShiftLeft>]>;
+def VQSHLU_N   : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl",
+                      [ImmCheck<1, ImmCheckShiftLeft>]>;
+
+// Narrowing right shifts should have an immediate range of 1..(sizeinbits(arg)/2).
+// However, as the overloaded type code that is supplied to a polymorphic builtin
+// is that of the return type (half as wide as the argument in this case), using
+// ImmCheckShiftRightNarrow would return in an upper bound of (sizeinbits(arg)/2)/2.
+// ImmCheckShiftRight produces the correct behavior here.
+def VSHRN_N    : IInst<"vshrn_n", "<QI", "silUsUiUl",
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQSHRUN_N  : SInst<"vqshrun_n", "(<U)QI", "sil",
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQRSHRUN_N : SInst<"vqrshrun_n", "(<U)QI", "sil",
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQSHRN_N   : SInst<"vqshrn_n", "<QI", "silUsUiUl",
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VRSHRN_N   : IInst<"vrshrn_n", "<QI", "silUsUiUl",
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQRSHRN_N  : SInst<"vqrshrn_n", "<QI", "silUsUiUl",
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+
+// Widening left-shifts should have a range of 0..(sizeinbits(arg)-1).
+// This polymorphic builtin is supplied the wider return type as it's overloaded
+// base type, so the range here is actually 0..(sizeinbits(arg)*2).
+// This cannot be rectified currently due to a use of vshll_n_s16 with an
+// out-of-bounds immediate in the defintiion of vcvt_f32_bf16.
+def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi",
+                      [ImmCheck<1, ImmCheckShiftLeft>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.13 Shifts with insert
 def VSRI_N : WInst<"vsri_n", "...I",
-                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">;
+                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs",
+                    [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 def VSLI_N : WInst<"vsli_n", "...I",
-                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">;
+                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs",
+                   [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -435,7 +466,8 @@ def VLD1_X3   : WInst<"vld1_x3", "3(c*!)",
 def VLD1_X4   : WInst<"vld1_x4", "4(c*!)",
                       "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
 def VLD1_LANE : WInst<"vld1_lane", ".(c*!).I",
-                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
+                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs",
+                      [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def VLD1_DUP  : WInst<"vld1_dup", ".(c*!)",
                       "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
 def VST1      : WInst<"vst1", "v*(.!)",
@@ -447,19 +479,23 @@ def VST1_X3   : WInst<"vst1_x3", "v*(3!)",
 def VST1_X4   : WInst<"vst1_x4", "v*(4!)",
                       "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
 def VST1_LANE : WInst<"vst1_lane", "v*(.!)I",
-                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
+                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs",
+                      [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+
 let ArchGuard = "(__ARM_FP & 2)" in {
 def VLD1_F16      : WInst<"vld1", ".(c*!)", "hQh">;
 def VLD1_X2_F16   : WInst<"vld1_x2", "2(c*!)", "hQh">;
 def VLD1_X3_F16   : WInst<"vld1_x3", "3(c*!)", "hQh">;
 def VLD1_X4_F16   : WInst<"vld1_x4", "4(c*!)", "hQh">;
-def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh">;
+def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh",
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def VLD1_DUP_F16  : WInst<"vld1_dup", ".(c*!)", "hQh">;
 def VST1_F16      : WInst<"vst1", "v*(.!)", "hQh">;
 def VST1_X2_F16   : WInst<"vst1_x2", "v*(2!)", "hQh">;
 def VST1_X3_F16   : WInst<"vst1_x3", "v*(3!)", "hQh">;
 def VST1_X4_F16   : WInst<"vst1_x4", "v*(4!)", "hQh">;
-def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh">;
+def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh",
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -473,15 +509,21 @@ def VLD3_DUP  : WInst<"vld3_dup", "3(c*!)",
                       "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
 def VLD4_DUP  : WInst<"vld4_dup", "4(c*!)",
                       "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
-def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
+def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+                      [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+                      [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
+def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+                      [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
 def VST2 : WInst<"vst2", "v*(2!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
 def VST3 : WInst<"vst3", "v*(3!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
 def VST4 : WInst<"vst4", "v*(4!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
-def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
+def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+                      [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+                      [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+                      [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 let ArchGuard = "(__ARM_FP & 2)" in {
 def VLD2_F16      : WInst<"vld2", "2(c*!)", "hQh">;
 def VLD3_F16      : WInst<"vld3", "3(c*!)", "hQh">;
@@ -489,28 +531,36 @@ def VLD4_F16      : WInst<"vld4", "4(c*!)", "hQh">;
 def VLD2_DUP_F16  : WInst<"vld2_dup", "2(c*!)", "hQh">;
 def VLD3_DUP_F16  : WInst<"vld3_dup", "3(c*!)", "hQh">;
 def VLD4_DUP_F16  : WInst<"vld4_dup", "4(c*!)", "hQh">;
-def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh">;
-def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh">;
-def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh">;
+def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh",
+                          [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh",
+                          [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
+def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh",
+                          [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
 def VST2_F16      : WInst<"vst2", "v*(2!)", "hQh">;
 def VST3_F16      : WInst<"vst3", "v*(3!)", "hQh">;
 def VST4_F16      : WInst<"vst4", "v*(4!)", "hQh">;
-def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh">;
-def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh">;
-def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh">;
+def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh",
+                          [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh",
+                         [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh",
+                          [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.16 Extract lanes from a vector
 let InstName = "vmov" in
 def VGET_LANE : IInst<"vget_lane", "1.I",
-                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">;
+                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.17 Set lanes within a vector
 let InstName = "vmov" in
 def VSET_LANE : IInst<"vset_lane", ".1.I",
-                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">;
+                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
+                      [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.18 Initialize a vector from bit pattern
@@ -559,11 +609,12 @@ let ArchGuard = "(__ARM_FP & 2)" in {
 def VCVT_S32     : SInst<"vcvt_s32", "S.",  "fQf">;
 def VCVT_U32     : SInst<"vcvt_u32", "U.",  "fQf">;
 def VCVT_F32     : SInst<"vcvt_f32", "F(.!)",  "iUiQiQUi">;
-let isVCVT_N = 1 in {
-def VCVT_N_S32   : SInst<"vcvt_n_s32", "S.I", "fQf">;
-def VCVT_N_U32   : SInst<"vcvt_n_u32", "U.I", "fQf">;
-def VCVT_N_F32   : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi">;
-}
+def VCVT_N_S32   : SInst<"vcvt_n_s32", "S.I", "fQf",
+                        [ImmCheck<1, ImmCheck1_32>]>;
+def VCVT_N_U32   : SInst<"vcvt_n_u32", "U.I", "fQf",
+                        [ImmCheck<1, ImmCheck1_32>]>;
+def VCVT_N_F32   : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi",
+                        [ImmCheck<1, ImmCheck1_32>]>;
 
 def VMOVN        : IInst<"vmovn", "<Q",  "silUsUiUl">;
 def VMOVL        : SInst<"vmovl", "(>Q).",  "csiUcUsUi">;
@@ -610,8 +661,10 @@ def VQDMULH_LANE  : SOpInst<"vqdmulh_lane", "..qI", "siQsQi", OP_QDMULH_LN>;
 def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>;
 }
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
-def A64_VQDMULH_LANE  : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi">;
-def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi">;
+def A64_VQDMULH_LANE  : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi",
+                              [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi",
+                              [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
 
 let TargetGuard = "v8.1a,neon" in {
@@ -629,7 +682,8 @@ def VQDMLSL_N     : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>;
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.26 Vector Extract
 def VEXT : WInst<"vext", "...I",
-                 "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf">;
+                 "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf",
+                 [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.27 Reverse vector elements
@@ -738,14 +792,22 @@ def ST1_X2 : WInst<"vst1_x2", "v*(2!)", "dQdPlQPl">;
 def ST1_X3 : WInst<"vst1_x3", "v*(3!)", "dQdPlQPl">;
 def ST1_X4 : WInst<"vst1_x4", "v*(4!)", "dQdPlQPl">;
 
-def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl">;
-def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl">;
-def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
+def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl",
+                    [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl",
+                    [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl",
+                    [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
+def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl",
+                    [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
+def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl",
+                    [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl",
+                    [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl",
+                    [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl",
+                    [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 
 def LD1_DUP  : WInst<"vld1_dup", ".(c*!)", "dQdPlQPl">;
 def LD2_DUP  : WInst<"vld2_dup", "2(c*!)", "dQdPlQPl">;
@@ -901,8 +963,8 @@ def SHLL_HIGH_N    : SOpInst<"vshll_high_n", ">.I", "HcHsHiHUcHUsHUi",
                              OP_LONG_HI>;
 
 ////////////////////////////////////////////////////////////////////////////////
-def SRI_N : WInst<"vsri_n", "...I", "PlQPl">;
-def SLI_N : WInst<"vsli_n", "...I", "PlQPl">;
+def SRI_N : WInst<"vsri_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftRight, 0>]>;
+def SLI_N : WInst<"vsli_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
 
 // Right shift narrow high
 def SHRN_HIGH_N    : IOpInst<"vshrn_high_n", "<(<q).I",
@@ -923,11 +985,12 @@ def QRSHRN_HIGH_N  : SOpInst<"vqrshrn_high_n", "<(<q).I",
 // Converting vectors
 def VMOVL_HIGH   : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>;
 
-let isVCVT_N = 1 in {
-def CVTF_N_F64   : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl">;
-def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd">;
-def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd">;
-}
+def CVTF_N_F64   : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl",
+                        [ImmCheck<1, ImmCheck1_64>]>;
+def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd",
+                        [ImmCheck<1, ImmCheck1_64>]>;
+def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd",
+                        [ImmCheck<1, ImmCheck1_64>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // 3VDiff class using high 64-bit in operands
@@ -965,29 +1028,25 @@ let TargetGuard = "aes,neon" in {
 
 ////////////////////////////////////////////////////////////////////////////////
 // Extract or insert element from vector
-def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl">;
-def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl">;
+def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl",
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl",
+                      [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def COPY_LANE : IOpInst<"vcopy_lane", "..I.I",
                         "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>;
 def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI",
                         "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>;
 def COPY_LANEQ : IOpInst<"vcopy_laneq", "..IQI",
-                     "csilPcPsPlUcUsUiUlfd", OP_COPY_LN> {
-  let isLaneQ = 1;
-}
+                     "csilPcPsPlUcUsUiUlfd", OP_COPY_LN>;
 def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I",
-                     "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN> {
-  let isLaneQ = 1;
-}
+                     "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Set all lanes to same value
 def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "dQdPlQPl", OP_DUP_LN>;
 def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI",
                   "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
-                        OP_DUP_LN> {
-  let isLaneQ = 1;
-}
+                        OP_DUP_LN>;
 def DUP_N   : WOpInst<"vdup_n", ".1", "dQdPlQPl", OP_DUP>;
 def MOV_N   : WOpInst<"vmov_n", ".1", "dQdPlQPl", OP_DUP>;
 
@@ -1003,60 +1062,36 @@ def CREATE : NoTestOpInst<"vcreate", ".(IU>)", "dPl", OP_CAST> {
 ////////////////////////////////////////////////////////////////////////////////
 
 def VMLA_LANEQ   : IOpInst<"vmla_laneq", "...QI",
-                           "siUsUifQsQiQUsQUiQf", OP_MLA_LN> {
-  let isLaneQ = 1;
-}
+                           "siUsUifQsQiQUsQUiQf", OP_MLA_LN>;
 def VMLS_LANEQ   : IOpInst<"vmls_laneq", "...QI",
-                           "siUsUifQsQiQUsQUiQf", OP_MLS_LN> {
-  let isLaneQ = 1;
-}
-
-def VFMA_LANE    : IInst<"vfma_lane", "...qI", "fdQfQd">;
-def VFMA_LANEQ   : IInst<"vfma_laneq", "...QI", "fdQfQd"> {
-  let isLaneQ = 1;
-}
+                           "siUsUifQsQiQUsQUiQf", OP_MLS_LN>;
+def VFMA_LANE    : IInst<"vfma_lane", "...qI", "fdQfQd",
+                        [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+def VFMA_LANEQ   : IInst<"vfma_laneq", "...QI", "fdQfQd",
+                        [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 def VFMS_LANE    : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>;
-def VFMS_LANEQ   : IOpInst<"vfms_laneq", "...QI", "fdQfQd", OP_FMS_LNQ> {
-  let isLaneQ = 1;
-}
+def VFMS_LANEQ   : IOpInst<"vfms_laneq", "...QI", "fdQfQd", OP_FMS_LNQ>;
 
-def VMLAL_LANEQ  : SOpInst<"vmlal_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLAL_LN> {
-  let isLaneQ = 1;
-}
+def VMLAL_LANEQ  : SOpInst<"vmlal_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLAL_LN>;
 def VMLAL_HIGH_LANE   : SOpInst<"vmlal_high_lane", "(>Q)(>Q)Q.I", "siUsUi",
                                 OP_MLALHi_LN>;
 def VMLAL_HIGH_LANEQ  : SOpInst<"vmlal_high_laneq", "(>Q)(>Q)QQI", "siUsUi",
-                                OP_MLALHi_LN> {
-  let isLaneQ = 1;
-}
-def VMLSL_LANEQ  : SOpInst<"vmlsl_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLSL_LN> {
-  let isLaneQ = 1;
-}
+                                OP_MLALHi_LN>;
+def VMLSL_LANEQ  : SOpInst<"vmlsl_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLSL_LN>;
 def VMLSL_HIGH_LANE   : SOpInst<"vmlsl_high_lane", "(>Q)(>Q)Q.I", "siUsUi",
                                 OP_MLSLHi_LN>;
 def VMLSL_HIGH_LANEQ  : SOpInst<"vmlsl_high_laneq", "(>Q)(>Q)QQI", "siUsUi",
-                                OP_MLSLHi_LN> {
-  let isLaneQ = 1;
-}
-
-def VQDMLAL_LANEQ  : SOpInst<"vqdmlal_laneq", "(>Q)(>Q).QI", "si", OP_QDMLAL_LN> {
-  let isLaneQ = 1;
-}
+                                OP_MLSLHi_LN>;
+def VQDMLAL_LANEQ  : SOpInst<"vqdmlal_laneq", "(>Q)(>Q).QI", "si", OP_QDMLAL_LN>;
 def VQDMLAL_HIGH_LANE   : SOpInst<"vqdmlal_high_lane", "(>Q)(>Q)Q.I", "si",
                                 OP_QDMLALHi_LN>;
 def VQDMLAL_HIGH_LANEQ  : SOpInst<"vqdmlal_high_laneq", "(>Q)(>Q)QQI", "si",
-                                OP_QDMLALHi_LN> {
-  let isLaneQ = 1;
-}
-def VQDMLSL_LANEQ  : SOpInst<"vqdmlsl_laneq", "(>Q)(>Q).QI", "si", OP_QDMLSL_LN> {
-  let isLaneQ = 1;
-}
+                                OP_QDMLALHi_LN>;
+def VQDMLSL_LANEQ  : SOpInst<"vqdmlsl_laneq", "(>Q)(>Q).QI", "si", OP_QDMLSL_LN>;
 def VQDMLSL_HIGH_LANE   : SOpInst<"vqdmlsl_high_lane", "(>Q)(>Q)Q.I", "si",
                                 OP_QDMLSLHi_LN>;
 def VQDMLSL_HIGH_LANEQ  : SOpInst<"vqdmlsl_high_laneq", "(>Q)(>Q)QQI", "si",
-                                OP_QDMLSLHi_LN> {
-  let isLaneQ = 1;
-}
+                                OP_QDMLSLHi_LN>;
 
 // Newly add double parameter for vmul_lane in aarch64
 // Note: d type is handled by SCALAR_VMUL_LANE
@@ -1064,48 +1099,31 @@ def VMUL_LANE_A64 : IOpInst<"vmul_lane", "..qI", "Qd", OP_MUL_LN>;
 
 // Note: d type is handled by SCALAR_VMUL_LANEQ
 def VMUL_LANEQ   : IOpInst<"vmul_laneq", "..QI",
-                           "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN> {
-  let isLaneQ = 1;
-}
-def VMULL_LANEQ  : SOpInst<"vmull_laneq", "(>Q).QI", "siUsUi", OP_MULL_LN> {
-  let isLaneQ = 1;
-}
+                           "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN>;
+def VMULL_LANEQ  : SOpInst<"vmull_laneq", "(>Q).QI", "siUsUi", OP_MULL_LN>;
 def VMULL_HIGH_LANE   : SOpInst<"vmull_high_lane", "(>Q)Q.I", "siUsUi",
                                 OP_MULLHi_LN>;
 def VMULL_HIGH_LANEQ  : SOpInst<"vmull_high_laneq", "(>Q)QQI", "siUsUi",
-                                OP_MULLHi_LN> {
-  let isLaneQ = 1;
-}
-
-def VQDMULL_LANEQ  : SOpInst<"vqdmull_laneq", "(>Q).QI", "si", OP_QDMULL_LN> {
-  let isLaneQ = 1;
-}
+                                OP_MULLHi_LN>;
+def VQDMULL_LANEQ  : SOpInst<"vqdmull_laneq", "(>Q).QI", "si", OP_QDMULL_LN>;
 def VQDMULL_HIGH_LANE   : SOpInst<"vqdmull_high_lane", "(>Q)Q.I", "si",
                                   OP_QDMULLHi_LN>;
 def VQDMULL_HIGH_LANEQ  : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si",
-                                  OP_QDMULLHi_LN> {
-  let isLaneQ = 1;
-}
+                                  OP_QDMULLHi_LN>;
+def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi",
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi",
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
-let isLaneQ = 1 in {
-def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi">;
-def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi">;
-}
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a,neon" in {
-def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> {
-  let isLaneQ = 1;
-}
-def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN> {
-  let isLaneQ = 1;
-}
+def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN>;
+def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN>;
 } // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a"
 
 // Note: d type implemented by SCALAR_VMULX_LANE
 def VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "fQfQd", OP_MULX_LN>;
 // Note: d type is implemented by SCALAR_VMULX_LANEQ
-def VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "fQfQd", OP_MULX_LN> {
-  let isLaneQ = 1;
-}
+def VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "fQfQd", OP_MULX_LN>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Across vectors class
@@ -1118,7 +1136,8 @@ def FMINNMV : SInst<"vminnmv", "1.", "fQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Newly added Vector Extract for f64
-def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl">;
+def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl",
+                    [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Crypto
@@ -1147,10 +1166,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "s
 def BCAX : SInst<"vbcax", "....", "QUcQUsQUiQUlQcQsQiQl">;
 def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">;
 def RAX1 : SInst<"vrax1", "...", "QUl">;
-
-let isVXAR = 1 in {
-def XAR :  SInst<"vxar", "...I", "QUl">;
-}
+def XAR :  SInst<"vxar", "...I", "QUl", [ImmCheck<2, ImmCheck0_63>]>;
 }
 
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sha3,neon" in {
@@ -1162,10 +1178,10 @@ def SHA512H2 : SInst<"vsha512h2", "....", "QUl">;
 
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4,neon" in {
 def SM3SS1 : SInst<"vsm3ss1", "....", "QUi">;
-def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi">;
-def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi">;
-def SM3TT2A : SInst<"vsm3tt2a", "....I", "QUi">;
-def SM3TT2B : SInst<"vsm3tt2b", "....I", "QUi">;
+def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
+def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
+def SM3TT2A : SInst<"vsm3tt2a", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
+def SM3TT2B : SInst<"vsm3tt2b", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
 def SM3PARTW1 : SInst<"vsm3partw1", "....", "QUi">;
 def SM3PARTW2 : SInst<"vsm3partw2", "....", "QUi">;
 }
@@ -1327,49 +1343,68 @@ def SCALAR_RSHL: SInst<"vrshl", "11(S1)", "SlSUl">;
 // Scalar Shift (Immediate)
 let isScalarShift = 1 in {
 // Signed/Unsigned Shift Right (Immediate)
-def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl">;
+def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl",
+                        [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 // Signed/Unsigned Rounding Shift Right (Immediate)
-def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl">;
+def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl",
+                          [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 
 // Signed/Unsigned Shift Right and Accumulate (Immediate)
-def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl">;
+def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl",
+                        [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 // Signed/Unsigned Rounding Shift Right and Accumulate (Immediate)
-def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl">;
+def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl",
+                        [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 
 // Shift Left (Immediate)
-def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl">;
+def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl",
+                      [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 // Signed/Unsigned Saturating Shift Left (Immediate)
-def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl">;
+def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl",
+                      [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 // Signed Saturating Shift Left Unsigned (Immediate)
-def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl">;
+def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl",
+                      [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 
 // Shift Right And Insert (Immediate)
-def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl">;
+def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl",
+                        [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 // Shift Left And Insert (Immediate)
-def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl">;
+def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl",
+                        [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
 
 let isScalarNarrowShift = 1 in {
   // Signed/Unsigned Saturating Shift Right Narrow (Immediate)
-  def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">;
+  def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl",
+                            [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate)
-  def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">;
+  def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl",
+                            [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed Saturating Shift Right Unsigned Narrow (Immediate)
-  def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl">;
+  def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl",
+                            [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
-  def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl">;
+  def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl",
+                            [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar Signed/Unsigned Fixed-point Convert To Floating-Point (Immediate)
-def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi">;
-def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl">;
+def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi",
+                              [ImmCheck<1, ImmCheck1_32>]>;
+def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl",
+                              [ImmCheck<1, ImmCheck1_64>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar Floating-point Convert To Signed/Unsigned Fixed-point (Immediate)
-def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf">;
-def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf">;
-def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd">;
-def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd">;
+def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf",
+                                [ImmCheck<1, ImmCheck1_32>]>;
+def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf",
+                                [ImmCheck<1, ImmCheck1_32>]>;
+def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd",
+                                [ImmCheck<1, ImmCheck1_64>]>;
+def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd",
+                                [ImmCheck<1, ImmCheck1_64>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1562,94 +1597,72 @@ def SCALAR_UQXTN : SInst<"vqmovn", "(1<)1", "SUsSUiSUl">;
 
 // Scalar Floating Point  multiply (scalar, by element)
 def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "11.I", "SfSd", OP_SCALAR_MUL_LN>;
-def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "11QI", "SfSd", OP_SCALAR_MUL_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "11QI", "SfSd", OP_SCALAR_MUL_LN>;
 
 // Scalar Floating Point  multiply extended (scalar, by element)
 def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "11.I", "SfSd", OP_SCALAR_MULX_LN>;
-def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_LN>;
 
 def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">;
 
 // VMUL_LANE_A64 d type implemented using scalar mul lane
-def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d">;
+def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d",
+                            [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
 // VMUL_LANEQ d type implemented using scalar mul lane
-def SCALAR_VMUL_LANEQ   : IInst<"vmul_laneq", "..QI", "d"> {
-  let isLaneQ = 1;
-}
-
+def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d",
+                              [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 // VMULX_LANE d type implemented using scalar vmulx_lane
 def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "d", OP_SCALAR_VMULX_LN>;
 
 // VMULX_LANEQ d type implemented using scalar vmulx_laneq
-def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ> {
-  let isLaneQ = 1;
-}
-
+def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ>;
 // Scalar Floating Point fused multiply-add (scalar, by element)
-def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd">;
-def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd"> {
-  let isLaneQ = 1;
-}
+def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd",
+                            [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd",
+                            [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
 // Scalar Floating Point fused multiply-subtract (scalar, by element)
 def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "111.I", "SfSd", OP_FMS_LN>;
-def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "111QI", "SfSd", OP_FMS_LNQ> {
-  let isLaneQ = 1;
-}
+def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "111QI", "SfSd", OP_FMS_LNQ>;
 
 // Signed Saturating Doubling Multiply Long (scalar by element)
 def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "(1>)1.I", "SsSi", OP_SCALAR_QDMULL_LN>;
-def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR_QDMULL_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR_QDMULL_LN>;
 
 // Signed Saturating Doubling Multiply-Add Long (scalar by element)
-def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi">;
-def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi"> {
-  let isLaneQ = 1;
-}
+def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi",
+                                [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi",
+                                [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
 // Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
-def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi">;
-def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi"> {
-  let isLaneQ = 1;
-}
-
+def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi",
+                              [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi",
+                              [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 // Scalar Integer Saturating Doubling Multiply Half High (scalar by element)
 def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "11.I", "SsSi", OP_SCALAR_QDMULH_LN>;
-def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QDMULH_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QDMULH_LN>;
 
 // Scalar Integer Saturating Rounding Doubling Multiply Half High
 def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "11.I", "SsSi", OP_SCALAR_QRDMULH_LN>;
-def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QRDMULH_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QRDMULH_LN>;
 
 let TargetGuard = "v8.1a,neon" in {
 // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
 def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "111.I", "SsSi", OP_SCALAR_QRDMLAH_LN>;
-def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLAH_LN> {
-  let isLaneQ = 1;
-}
-
+def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLAH_LN>;
 // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
 def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "111.I", "SsSi", OP_SCALAR_QRDMLSH_LN>;
-def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN>;
 } // TargetGuard = "v8.1a"
 
-def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
-def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs"> {
-  let isLaneQ = 1;
-}
+def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs",
+                            [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs",
+                            [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
 } // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)"
 
@@ -1719,11 +1732,12 @@ let TargetGuard = "fullfp16,neon" in {
     def VCLTH      : SOpInst<"vclt", "U..", "hQh", OP_LT>;
 
   // Vector conversion
-  let isVCVT_N = 1 in {
-    def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs">;
-    def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh">;
-    def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh">;
-  }
+    def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs",
+                          [ImmCheck<1, ImmCheck1_16>]>;
+    def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh",
+                          [ImmCheck<1, ImmCheck1_16>]>;
+    def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh",
+                          [ImmCheck<1, ImmCheck1_16>]>;
 
   // Max/Min
   def VMAXH         : SInst<"vmax", "...", "hQh">;
@@ -1770,7 +1784,7 @@ def VZIPH    : WInst<"vzip", "2..", "hQh">;
 def VUZPH    : WInst<"vuzp", "2..", "hQh">;
 def VTRNH    : WInst<"vtrn", "2..", "hQh">;
 // Vector Extract
-def VEXTH      : WInst<"vext", "...I", "hQh">;
+def VEXTH      : WInst<"vext", "...I", "hQh", [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 // Reverse vector elements
 def VREV64H    : WOpInst<"vrev64", "..", "hQh", OP_REV64>;
 
@@ -1801,54 +1815,42 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   // ARMv8.2-A FP16 lane vector intrinsics.
 
   // FMA lane
-  def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh">;
-  def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh"> {
-    let isLaneQ = 1;
-  }
+  def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh",
+                          [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+  def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh",
+                          [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
   // FMA lane with scalar argument
   def FMLA_NH      : SOpInst<"vfma_n", "...1", "hQh", OP_FMLA_N>;
   // Scalar floating point fused multiply-add (scalar, by element)
-  def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh">;
-  def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh"> {
-    let isLaneQ = 1;
-  }
+  def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh",
+                                [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+  def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh",
+                                [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
   // FMS lane
   def VFMS_LANEH   : IOpInst<"vfms_lane", "...qI", "hQh", OP_FMS_LN>;
-  def VFMS_LANEQH  : IOpInst<"vfms_laneq", "...QI", "hQh", OP_FMS_LNQ> {
-    let isLaneQ = 1;
-  }
+  def VFMS_LANEQH  : IOpInst<"vfms_laneq", "...QI", "hQh", OP_FMS_LNQ>;
   // FMS lane with scalar argument
   def FMLS_NH      : SOpInst<"vfms_n", "...1", "hQh", OP_FMLS_N>;
   // Scalar floating foint fused multiply-subtract (scalar, by element)
   def SCALAR_FMLS_LANEH  : IOpInst<"vfms_lane", "111.I", "Sh", OP_FMS_LN>;
-  def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "111QI", "Sh", OP_FMS_LNQ> {
-    let isLaneQ = 1;
-  }
-
+  def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "111QI", "Sh", OP_FMS_LNQ>;
   // Mul lane
-  def VMUL_LANEQH   : IOpInst<"vmul_laneq", "..QI", "hQh", OP_MUL_LN> {
-    let isLaneQ = 1;
-  }
+  def VMUL_LANEQH   : IOpInst<"vmul_laneq", "..QI", "hQh", OP_MUL_LN>;
   // Scalar floating point  multiply (scalar, by element)
   def SCALAR_FMUL_LANEH  : IOpInst<"vmul_lane", "11.I", "Sh", OP_SCALAR_MUL_LN>;
-  def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "11QI", "Sh", OP_SCALAR_MUL_LN> {
-    let isLaneQ = 1;
-  }
+  def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "11QI", "Sh", OP_SCALAR_MUL_LN>;
 
   // Mulx lane
   def VMULX_LANEH   : IOpInst<"vmulx_lane", "..qI", "hQh", OP_MULX_LN>;
-  def VMULX_LANEQH  : IOpInst<"vmulx_laneq", "..QI", "hQh", OP_MULX_LN> {
-    let isLaneQ = 1;
-  }
+  def VMULX_LANEQH  : IOpInst<"vmulx_laneq", "..QI", "hQh", OP_MULX_LN>;
   def VMULX_NH      : IOpInst<"vmulx_n", "..1", "hQh", OP_MULX_N>;
   // Scalar floating point  mulx (scalar, by element)
-  def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh">;
-  def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh"> {
-    let isLaneQ = 1;
-  }
-
+  def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh",
+                                [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh",
+                                [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
   // ARMv8.2-A FP16 reduction vector intrinsics.
   def VMAXVH   : SInst<"vmaxv", "1.", "hQh">;
   def VMINVH   : SInst<"vminv", "1.", "hQh">;
@@ -1865,10 +1867,10 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
   def VZIP2H     : SOpInst<"vzip2", "...", "hQh", OP_ZIP2>;
   def VUZP2H     : SOpInst<"vuzp2", "...", "hQh", OP_UZP2>;
 
-  def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "1.I", "Sh">;
-  def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh"> {
-    let isLaneQ = 1;
-  }
+  def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "1.I", "Sh",
+                                [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+  def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh",
+                                [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 }
 
 // v8.2-A dot product instructions.
@@ -1878,9 +1880,7 @@ let TargetGuard = "dotprod,neon" in {
 }
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "dotprod,neon" in {
   // Variants indexing into a 128-bit vector are A64 only.
-  def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<<Q)I", "iUiQiQUi", OP_DOT_LNQ> {
-    let isLaneQ = 1;
-  }
+  def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<<Q)I", "iUiQiQUi", OP_DOT_LNQ>;
 }
 
 // v8.2-A FP16 fused multiply-add long instructions.
@@ -1895,18 +1895,10 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLAL_LN_Hi>;
   def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLSL_LN_Hi>;
 
-  def VFMLAL_LANEQ_LOW  : SOpInst<"vfmlal_laneq_low",  "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN> {
-    let isLaneQ = 1;
-  }
-  def VFMLSL_LANEQ_LOW  : SOpInst<"vfmlsl_laneq_low",  "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN> {
-    let isLaneQ = 1;
-  }
-  def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi> {
-    let isLaneQ = 1;
-  }
-  def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi> {
-    let isLaneQ = 1;
-  }
+  def VFMLAL_LANEQ_LOW  : SOpInst<"vfmlal_laneq_low",  "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN>;
+  def VFMLSL_LANEQ_LOW  : SOpInst<"vfmlsl_laneq_low",  "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN>;
+  def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi>;
+  def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi>;
 }
 
 let TargetGuard = "i8mm,neon" in {
@@ -1919,19 +1911,15 @@ let TargetGuard = "i8mm,neon" in {
   def VSUDOT_LANE  : SOpInst<"vsudot_lane", "..(<<)(<<qU)I", "iQi", OP_SUDOT_LN>;
 
   let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
-    let isLaneQ = 1 in {
-      def VUSDOT_LANEQ  : SOpInst<"vusdot_laneq", "..(<<U)(<<Q)I", "iQi", OP_USDOT_LNQ>;
-      def VSUDOT_LANEQ  : SOpInst<"vsudot_laneq", "..(<<)(<<QU)I", "iQi", OP_SUDOT_LNQ>;
-    }
+    def VUSDOT_LANEQ  : SOpInst<"vusdot_laneq", "..(<<U)(<<Q)I", "iQi", OP_USDOT_LNQ>;
+    def VSUDOT_LANEQ  : SOpInst<"vsudot_laneq", "..(<<)(<<QU)I", "iQi", OP_SUDOT_LNQ>;
   }
 }
 
 let TargetGuard = "bf16,neon" in {
   def VDOT_BF : SInst<"vbfdot", "..BB", "fQf">;
   def VDOT_LANE_BF : SOpInst<"vbfdot_lane", "..B(Bq)I", "fQf", OP_BFDOT_LN>;
-  def VDOT_LANEQ_BF : SOpInst<"vbfdot_laneq", "..B(BQ)I", "fQf", OP_BFDOT_LNQ> {
-    let isLaneQ = 1;
-  }
+  def VDOT_LANEQ_BF : SOpInst<"vbfdot_laneq", "..B(BQ)I", "fQf", OP_BFDOT_LNQ>;
 
   def VFMMLA_BF : SInst<"vbfmmla", "..BB", "Qf">;
 
@@ -1952,20 +1940,16 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
     // vcmla{ROT}_lane
     def : SOpInst<"vcmla" # ROT # "_lane", "...qI", type, Op<(call "vcmla" # ROT, $p0, $p1,
            (bitcast $p0, (dup_typed lanety , (call "vget_lane", (bitcast lanety, $p2), $p3))))>>;
-
     // vcmlaq{ROT}_lane
     def : SOpInst<"vcmla" # ROT # "_lane", "...qI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
            (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast lanety, $p2), $p3))))>>;
 
-    let isLaneQ = 1 in  {
-      // vcmla{ROT}_laneq
-      def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type,  Op<(call "vcmla" # ROT, $p0, $p1,
-              (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
-
-      // vcmlaq{ROT}_laneq
-      def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
-             (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
-    }
+    // vcmla{ROT}_laneq
+    def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type,  Op<(call "vcmla" # ROT, $p0, $p1,
+            (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
+    // vcmlaq{ROT}_laneq
+    def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
+            (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
   }
 }
 
@@ -2002,21 +1986,21 @@ let TargetGuard = "bf16,neon" in {
   def VDUP_N_BF    : WOpInst<"vdup_n", ".1", "bQb", OP_DUP>;
 
   def VDUP_LANE_BF : WOpInst<"vdup_lane", ".qI", "bQb", OP_DUP_LN>;
-  def VDUP_LANEQ_BF: WOpInst<"vdup_laneq", ".QI", "bQb", OP_DUP_LN> {
-    let isLaneQ = 1;
-  }
+  def VDUP_LANEQ_BF: WOpInst<"vdup_laneq", ".QI", "bQb", OP_DUP_LN>;
 
   def VCOMBINE_BF  : NoTestOpInst<"vcombine", "Q..", "b", OP_CONC>;
 
   def VGET_HIGH_BF : NoTestOpInst<"vget_high", ".Q", "b", OP_HI>;
   def VGET_LOW_BF  : NoTestOpInst<"vget_low", ".Q", "b", OP_LO>;
 
-  def VGET_LANE_BF : IInst<"vget_lane", "1.I", "bQb">;
-  def VSET_LANE_BF : IInst<"vset_lane", ".1.I", "bQb">;
-  def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb">;
-  def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb"> {
-    let isLaneQ = 1;
-  }
+  def VGET_LANE_BF : IInst<"vget_lane", "1.I", "bQb",
+                          [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+  def VSET_LANE_BF : IInst<"vset_lane", ".1.I", "bQb",
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb",
+                          [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+  def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb",
+                          [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
   def VLD1_BF : WInst<"vld1", ".(c*!)", "bQb">;
   def VLD2_BF : WInst<"vld2", "2(c*!)", "bQb">;
@@ -2036,14 +2020,22 @@ let TargetGuard = "bf16,neon" in {
   def VST1_X3_BF : WInst<"vst1_x3", "v*(3!)", "bQb">;
   def VST1_X4_BF : WInst<"vst1_x4", "v*(4!)", "bQb">;
 
-  def VLD1_LANE_BF : WInst<"vld1_lane", ".(c*!).I", "bQb">;
-  def VLD2_LANE_BF : WInst<"vld2_lane", "2(c*!)2I", "bQb">;
-  def VLD3_LANE_BF : WInst<"vld3_lane", "3(c*!)3I", "bQb">;
-  def VLD4_LANE_BF : WInst<"vld4_lane", "4(c*!)4I", "bQb">;
-  def VST1_LANE_BF : WInst<"vst1_lane", "v*(.!)I", "bQb">;
-  def VST2_LANE_BF : WInst<"vst2_lane", "v*(2!)I", "bQb">;
-  def VST3_LANE_BF : WInst<"vst3_lane", "v*(3!)I", "bQb">;
-  def VST4_LANE_BF : WInst<"vst4_lane", "v*(4!)I", "bQb">;
+  def VLD1_LANE_BF : WInst<"vld1_lane", ".(c*!).I", "bQb",
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def VLD2_LANE_BF : WInst<"vld2_lane", "2(c*!)2I", "bQb",
+                          [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+  def VLD3_LANE_BF : WInst<"vld3_lane", "3(c*!)3I", "bQb",
+                          [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
+  def VLD4_LANE_BF : WInst<"vld4_lane", "4(c*!)4I", "bQb",
+                          [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
+  def VST1_LANE_BF : WInst<"vst1_lane", "v*(.!)I", "bQb",
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def VST2_LANE_BF : WInst<"vst2_lane", "v*(2!)I", "bQb",
+                          [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+  def VST3_LANE_BF : WInst<"vst3_lane", "v*(3!)I", "bQb",
+                          [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+  def VST4_LANE_BF : WInst<"vst4_lane", "v*(4!)I", "bQb",
+                          [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 
   def VLD1_DUP_BF : WInst<"vld1_dup", ".(c*!)", "bQb">;
   def VLD2_DUP_BF : WInst<"vld2_dup", "2(c*!)", "bQb">;
@@ -2093,6 +2085,44 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "b
 
 // v8.9a/v9.4a LRCPC3 intrinsics
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "rcpc3,neon" in {
-  def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl">;
-  def VSTL1_LANE  : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl">;
+  def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl",
+                        [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def VSTL1_LANE  : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl",
+                        [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+}
+
+// Lookup table read with 2-bit/4-bit indices
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in {
+  def VLUTI2_B    : SInst<"vluti2_lane", "Q.(qU)I", "cUcPcQcQUcQPc",
+                         [ImmCheck<2, ImmCheck0_1>]>;
+  def VLUTI2_B_Q  : SInst<"vluti2_laneq", "Q.(QU)I", "cUcPcQcQUcQPc",
+                         [ImmCheck<2, ImmCheck0_3>]>;
+  def VLUTI2_H    : SInst<"vluti2_lane", "Q.(<qU)I", "sUsPshQsQUsQPsQh",
+                         [ImmCheck<2, ImmCheck0_3>]>;
+  def VLUTI2_H_Q  : SInst<"vluti2_laneq", "Q.(<QU)I", "sUsPshQsQUsQPsQh",
+                         [ImmCheck<2, ImmCheck0_7>]>;
+  def VLUTI4_B    : SInst<"vluti4_lane", "..(qU)I", "QcQUcQPc",
+                         [ImmCheck<2, ImmCheck0_0>]>;
+  def VLUTI4_B_Q  : SInst<"vluti4_laneq", "..UI", "QcQUcQPc",
+                         [ImmCheck<2, ImmCheck0_1>]>;
+  def VLUTI4_H_X2 : SInst<"vluti4_lane_x2", ".2(<qU)I", "QsQUsQPsQh",
+                          [ImmCheck<3, ImmCheck0_1>]>;
+  def VLUTI4_H_X2_Q : SInst<"vluti4_laneq_x2", ".2(<U)I", "QsQUsQPsQh",
+                          [ImmCheck<3, ImmCheck0_3>]>;
+
+  let TargetGuard = "lut,bf16" in {
+    def VLUTI2_BF      : SInst<"vluti2_lane", "Q.(<qU)I", "bQb",
+                              [ImmCheck<2, ImmCheck0_3>]>;
+    def VLUTI2_BF_Q    : SInst<"vluti2_laneq", "Q.(<QU)I", "bQb",
+                              [ImmCheck<2, ImmCheck0_7>]>;
+    def VLUTI4_BF_X2   : SInst<"vluti4_lane_x2", ".2(<qU)I", "Qb",
+                              [ImmCheck<3, ImmCheck0_1>]>;
+    def VLUTI4_BF_X2_Q   : SInst<"vluti4_laneq_x2", ".2(<U)I", "Qb",
+                              [ImmCheck<3, ImmCheck0_3>]>;
+  }
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "neon,faminmax" in {
+  def FAMIN : WInst<"vamin", "...", "fhQdQfQh">;
+  def FAMAX : WInst<"vamax", "...", "fhQdQfQh">;
 }
diff --git clang/include/clang/Basic/arm_neon_incl.td clang/include/clang/Basic/arm_neon_incl.td
index 3b8015daee6d..b088e0794cde 100644
--- clang/include/clang/Basic/arm_neon_incl.td
+++ clang/include/clang/Basic/arm_neon_incl.td
@@ -21,6 +21,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+include "arm_immcheck_incl.td"
+
 // The base Operation class. All operations must subclass this.
 class Operation<list<dag> ops=[]> {
   list<dag> Ops = ops;
@@ -260,7 +262,7 @@ def OP_UNAVAILABLE : Operation {
 
 
 // Every intrinsic subclasses Inst.
-class Inst <string n, string p, string t, Operation o> {
+class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{
   string Name = n;
   string Prototype = p;
   string Types = t;
@@ -272,12 +274,7 @@ class Inst <string n, string p, string t, Operation o> {
   bit isShift = 0;
   bit isScalarShift = 0;
   bit isScalarNarrowShift = 0;
-  bit isVCVT_N = 0;
-  bit isVXAR = 0;
-  // For immediate checks: the immediate will be assumed to specify the lane of
-  // a Q register. Only used for intrinsics which end up calling polymorphic
-  // builtins.
-  bit isLaneQ = 0;
+  list<ImmCheck> ImmChecks = ch;
 
   // Certain intrinsics have different names than their representative
   // instructions. This field allows us to handle this correctly when we
@@ -300,9 +297,9 @@ class Inst <string n, string p, string t, Operation o> {
 // SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8")
 // IInst: Instruction with generic integer suffix (e.g., "i8")
 // WInst: Instruction with only bit size suffix (e.g., "8")
-class SInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
-class IInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
-class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
+class SInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
+class IInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
+class WInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
 
 // The following instruction classes are implemented via operators
 // instead of builtins. As such these declarations are only used for
diff --git clang/include/clang/Basic/arm_sve.td clang/include/clang/Basic/arm_sve.td
index 078373823a3b..edf73d9022b0 100644
--- clang/include/clang/Basic/arm_sve.td
+++ clang/include/clang/Basic/arm_sve.td
@@ -1939,6 +1939,24 @@ def SVTBL2_BF16 : SInst<"svtbl2[_{d}]", "d2u",  "b", MergeNone, "", [VerifyRunti
 def SVTBX_BF16  : SInst<"svtbx[_{d}]",  "dddu", "b", MergeNone, "aarch64_sve_tbx", [VerifyRuntimeMode]>;
 }
 
+////////////////////////////////////////////////////////////////////////////////
+// SVE2 - Lookup table
+let SVETargetGuard = "sve2,lut", SMETargetGuard = "sme2,lut" in {
+  def SVLUTI2_B : SInst<"svluti2_lane[_{d}]", "dd[i", "cUc", MergeNone, "aarch64_sve_luti2_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
+  def SVLUTI2_H : SInst<"svluti2_lane[_{d}]", "dd[i", "sUsh", MergeNone, "aarch64_sve_luti2_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_7>]>;
+
+  def SVLUTI4_B : SInst<"svluti4_lane[_{d}]", "dd[i", "cUc", MergeNone, "aarch64_sve_luti4_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_1>]>;
+  def SVLUTI4_H : SInst<"svluti4_lane[_{d}]", "dd[i", "sUsh", MergeNone, "aarch64_sve_luti4_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
+
+  def SVLUTI4_x2 : SInst<"svluti4_lane[_{d}]_x2", "d2.d[i", "sUsh", MergeNone, "aarch64_sve_luti4_lane_x2", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
+}
+
+let SVETargetGuard = "sve2,lut,bf16", SMETargetGuard = "sme2,lut,bf16" in {
+  def SVLUTI2_BF16 : SInst<"svluti2_lane[_{d}]", "dd[i", "b", MergeNone, "aarch64_sve_luti2_lane", [ VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_7>]>;
+  def SVLUTI4_BF16 : SInst<"svluti4_lane[_{d}]", "dd[i", "b", MergeNone, "aarch64_sve_luti4_lane", [ VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
+  def SVLUTI4_BF16_x2 : SInst<"svluti4_lane[_{d}]_x2", "d2.d[i", "b", MergeNone, "aarch64_sve_luti4_lane_x2", [ VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Optional
 
@@ -2235,6 +2253,13 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
   def SVSQDMULH_X4        : SInst<"svqdmulh[_{d}_x4]",        "444", "csil", MergeNone, "aarch64_sve_sqdmulh_vgx4",        [IsStreaming], []>;
 }
 
+let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,faminmax" in {
+  def FAMIN_X2 : Inst<"svamin[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sme_famin_x2",  [IsStreaming], []>;
+  def FAMAX_X2 : Inst<"svamax[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sme_famax_x2",  [IsStreaming], []>;
+  def FAMIN_X4 : Inst<"svamin[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sme_famin_x4",  [IsStreaming], []>;
+  def FAMAX_X4 : Inst<"svamax[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sme_famax_x4",  [IsStreaming], []>;
+}
+
 let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
   def REINTERPRET_SVBOOL_TO_SVCOUNT : Inst<"svreinterpret[_c]", "}P", "Pc", MergeNone, "", [VerifyRuntimeMode], []>;
   def REINTERPRET_SVCOUNT_TO_SVBOOL : Inst<"svreinterpret[_b]", "P}", "Pc", MergeNone, "", [VerifyRuntimeMode], []>;
@@ -2401,3 +2426,8 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
   def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
   def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
 }
+
+let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in {
+  defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">;
+  defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">;
+}
diff --git clang/include/clang/Basic/arm_sve_sme_incl.td clang/include/clang/Basic/arm_sve_sme_incl.td
index 6ec357825a13..fdf4ba55fe93 100644
--- clang/include/clang/Basic/arm_sve_sme_incl.td
+++ clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -13,6 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+include "arm_immcheck_incl.td"
+
 //===----------------------------------------------------------------------===//
 // Instruction definitions
 //===----------------------------------------------------------------------===//
@@ -233,40 +235,6 @@ def IsInZT0                         : FlagType<0x400000000000>;
 def IsOutZT0                        : FlagType<0x800000000000>;
 def IsInOutZT0                      : FlagType<0x1000000000000>;
 
-// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
-class ImmCheckType<int val> {
-  int Value = val;
-}
-def ImmCheck0_31                : ImmCheckType<0>;  // 0..31 (used for e.g. predicate patterns)
-def ImmCheck1_16                : ImmCheckType<1>;  // 1..16
-def ImmCheckExtract             : ImmCheckType<2>;  // 0..(2048/sizeinbits(elt) - 1)
-def ImmCheckShiftRight          : ImmCheckType<3>;  // 1..sizeinbits(elt)
-def ImmCheckShiftRightNarrow    : ImmCheckType<4>;  // 1..sizeinbits(elt)/2
-def ImmCheckShiftLeft           : ImmCheckType<5>;  // 0..(sizeinbits(elt) - 1)
-def ImmCheck0_7                 : ImmCheckType<6>;  // 0..7
-def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(128/(1*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexCompRotate : ImmCheckType<8>;  // 0..(128/(2*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexDot        : ImmCheckType<9>;  // 0..(128/(4*sizeinbits(elt)) - 1)
-def ImmCheckComplexRot90_270    : ImmCheckType<10>; // [90,270]
-def ImmCheckComplexRotAll90     : ImmCheckType<11>; // [0, 90, 180,270]
-def ImmCheck0_13                : ImmCheckType<12>; // 0..13
-def ImmCheck0_1                 : ImmCheckType<13>; // 0..1
-def ImmCheck0_2                 : ImmCheckType<14>; // 0..2
-def ImmCheck0_3                 : ImmCheckType<15>; // 0..3
-def ImmCheck0_0                 : ImmCheckType<16>; // 0..0
-def ImmCheck0_15                : ImmCheckType<17>; // 0..15
-def ImmCheck0_255               : ImmCheckType<18>; // 0..255
-def ImmCheck2_4_Mul2            : ImmCheckType<19>; // 2, 4
-def ImmCheck1_1                 : ImmCheckType<20>; // 1..1
-def ImmCheck1_3                 : ImmCheckType<21>; // 1..3
-def ImmCheck1_7                 : ImmCheckType<22>; // 1..7
-
-class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
-  int Arg = arg;
-  int EltSizeArg = eltSizeArg;
-  ImmCheckType Kind = kind;
-}
-
 defvar InvalidMode = "";
 
 class Inst<string n, string p, string t, MergeType mt, string i,
diff --git clang/include/clang/Driver/Options.td clang/include/clang/Driver/Options.td
index 53545b72d550..8c6919965c70 100644
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -949,7 +949,7 @@ def : Flag<["-"], "fident">, Group<f_Group>, Alias<Qy>,
 def : Flag<["-"], "fno-ident">, Group<f_Group>, Alias<Qn>,
   Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>;
 def Qunused_arguments : Flag<["-"], "Qunused-arguments">,
-  Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, DXCOption]>,
+  Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>,
   HelpText<"Don't emit warning for unused driver arguments">;
 def Q : Flag<["-"], "Q">, IgnoredGCCCompat;
 def S : Flag<["-"], "S">, Flags<[NoXarchOption]>,
@@ -1051,6 +1051,7 @@ def z : Separate<["-"], "z">, Flags<[LinkerInput]>,
 def offload_link : Flag<["--"], "offload-link">, Group<Link_Group>,
   HelpText<"Use the new offloading linker to perform the link job.">;
 def Xlinker : Separate<["-"], "Xlinker">, Flags<[LinkerInput, RenderAsInput]>,
+  Visibility<[ClangOption, CLOption, FlangOption, DXCOption]>,
   HelpText<"Pass <arg> to the linker">, MetaVarName<"<arg>">,
   Group<Link_Group>;
 def Xoffload_linker : JoinedAndSeparate<["-"], "Xoffload-linker">,
@@ -3545,6 +3546,7 @@ def fopenmp : Flag<["-"], "fopenmp">, Group<f_Group>,
   Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
   HelpText<"Parse OpenMP pragmas and generate parallel code.">;
 def fno_openmp : Flag<["-"], "fno-openmp">, Group<f_Group>,
+  Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
   Flags<[NoArgumentUnused]>;
 class OpenMPVersionHelp<string program, string default> {
   string str = !strconcat(
@@ -5989,7 +5991,9 @@ def _no_line_commands : Flag<["--"], "no-line-commands">, Alias<P>;
 def _no_standard_includes : Flag<["--"], "no-standard-includes">, Alias<nostdinc>;
 def _no_standard_libraries : Flag<["--"], "no-standard-libraries">, Alias<nostdlib>;
 def _no_undefined : Flag<["--"], "no-undefined">, Flags<[LinkerInput]>;
-def _no_warnings : Flag<["--"], "no-warnings">, Alias<w>;
+def _no_warnings : Flag<["--"], "no-warnings">,
+  Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
+  Alias<w>;
 def _optimize_EQ : Joined<["--"], "optimize=">, Alias<O>;
 def _optimize : Flag<["--"], "optimize">, Alias<O>;
 def _output_class_directory_EQ : Joined<["--"], "output-class-directory=">, Alias<foutput_class_dir_EQ>;
@@ -6765,6 +6769,14 @@ def fdefault_integer_8 : Flag<["-"],"fdefault-integer-8">, Group<f_Group>,
   HelpText<"Set the default integer and logical kind to an 8 byte wide type">;
 def fdefault_real_8 : Flag<["-"],"fdefault-real-8">, Group<f_Group>,
   HelpText<"Set the default real kind to an 8 byte wide type">;
+def fdisable_real_3 : Flag<["-"],"fdisable-real-3">, Group<f_Group>,
+   HelpText<"Disable real(KIND=3) from TargetCharacteristics">, Flags<[HelpHidden]>;
+def fdisable_real_10 : Flag<["-"],"fdisable-real-10">, Group<f_Group>,
+  HelpText<"Disable real(KIND=10) from TargetCharacteristics">, Flags<[HelpHidden]>;
+def fdisable_integer_2 : Flag<["-"],"fdisable-integer-2">, Group<f_Group>,
+  HelpText<"Disable integer(KIND=2) from TargetCharacteristics">, Flags<[HelpHidden]>;
+def fdisable_integer_16 : Flag<["-"],"fdisable-integer-16">, Group<f_Group>,
+  HelpText<"Disable integer(KIND=16) from TargetCharacteristics">, Flags<[HelpHidden]>;
 def flarge_sizes : Flag<["-"],"flarge-sizes">, Group<f_Group>,
   HelpText<"Use INTEGER(KIND=8) for the result type in size-related intrinsics">;
 
diff --git clang/include/clang/Driver/Types.def clang/include/clang/Driver/Types.def
index 0e0cae5fb706..af186c5df692 100644
--- clang/include/clang/Driver/Types.def
+++ clang/include/clang/Driver/Types.def
@@ -79,7 +79,17 @@ TYPE("c++-module-cpp-output",    PP_CXXModule, INVALID,         "iim",    phases
 TYPE("ada",                      Ada,          INVALID,         nullptr,  phases::Compile, phases::Backend, phases::Assemble, phases::Link)
 TYPE("assembler",                PP_Asm,       INVALID,         "s",      phases::Assemble, phases::Link)
 TYPE("assembler-with-cpp",       Asm,          PP_Asm,          "S",      phases::Preprocess, phases::Assemble, phases::Link)
-TYPE("f95",                      PP_Fortran,   INVALID,         "i",      phases::Compile, phases::Backend, phases::Assemble, phases::Link)
+
+// Note: The `phases::Preprocess` phase is added to ".i" (i.e. Fortran
+// pre-processed) files. The reason is that the pre-processor "phase" has to be
+// re-run to make sure that e.g. the include flags (i.e. `-I <dir>`) are
+// preserved. That's because these include paths will contain module files and,
+// unlike C header files, these module files wouldn't be included in the
+// pre-processed file. In particular, we need to add the search paths for these
+// modules when Flang needs to emit pre-processed files. Therefore, the
+// `PP_TYPE` is set to `PP_Fortran` so that the driver is fine with
+// "pre-processing a pre-processed file".
+TYPE("f95",                      PP_Fortran,   PP_Fortran,      "i",      phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link)
 TYPE("f95-cpp-input",            Fortran,      PP_Fortran,      nullptr,  phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link)
 TYPE("java",                     Java,         INVALID,         nullptr,  phases::Compile, phases::Backend, phases::Assemble, phases::Link)
 
diff --git clang/include/clang/Interpreter/Value.h clang/include/clang/Interpreter/Value.h
index d70e8f871902..a93c0841915f 100644
--- clang/include/clang/Interpreter/Value.h
+++ clang/include/clang/Interpreter/Value.h
@@ -33,6 +33,7 @@
 #ifndef LLVM_CLANG_INTERPRETER_VALUE_H
 #define LLVM_CLANG_INTERPRETER_VALUE_H
 
+#include "llvm/Config/llvm-config.h" // for LLVM_BUILD_LLVM_DYLIB, LLVM_BUILD_SHARED_LIBS
 #include "llvm/Support/Compiler.h"
 #include <cstdint>
 
diff --git clang/include/clang/Lex/Preprocessor.h clang/include/clang/Lex/Preprocessor.h
index 1307659e27d1..4643b0213815 100644
--- clang/include/clang/Lex/Preprocessor.h
+++ clang/include/clang/Lex/Preprocessor.h
@@ -1053,22 +1053,6 @@ private:
     std::optional<MacroAnnotationInfo> DeprecationInfo;
     std::optional<MacroAnnotationInfo> RestrictExpansionInfo;
     std::optional<SourceLocation> FinalAnnotationLoc;
-
-    static MacroAnnotations makeDeprecation(SourceLocation Loc,
-                                            std::string Msg) {
-      return MacroAnnotations{MacroAnnotationInfo{Loc, std::move(Msg)},
-                              std::nullopt, std::nullopt};
-    }
-
-    static MacroAnnotations makeRestrictExpansion(SourceLocation Loc,
-                                                  std::string Msg) {
-      return MacroAnnotations{
-          std::nullopt, MacroAnnotationInfo{Loc, std::move(Msg)}, std::nullopt};
-    }
-
-    static MacroAnnotations makeFinal(SourceLocation Loc) {
-      return MacroAnnotations{std::nullopt, std::nullopt, Loc};
-    }
   };
 
   /// Warning information for macro annotations.
@@ -2884,35 +2868,18 @@ public:
 
   void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg,
                               SourceLocation AnnotationLoc) {
-    auto Annotations = AnnotationInfos.find(II);
-    if (Annotations == AnnotationInfos.end())
-      AnnotationInfos.insert(std::make_pair(
-          II,
-          MacroAnnotations::makeDeprecation(AnnotationLoc, std::move(Msg))));
-    else
-      Annotations->second.DeprecationInfo =
-          MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
+    AnnotationInfos[II].DeprecationInfo =
+        MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
   }
 
   void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg,
                                SourceLocation AnnotationLoc) {
-    auto Annotations = AnnotationInfos.find(II);
-    if (Annotations == AnnotationInfos.end())
-      AnnotationInfos.insert(
-          std::make_pair(II, MacroAnnotations::makeRestrictExpansion(
-                                 AnnotationLoc, std::move(Msg))));
-    else
-      Annotations->second.RestrictExpansionInfo =
-          MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
+    AnnotationInfos[II].RestrictExpansionInfo =
+        MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
   }
 
   void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) {
-    auto Annotations = AnnotationInfos.find(II);
-    if (Annotations == AnnotationInfos.end())
-      AnnotationInfos.insert(
-          std::make_pair(II, MacroAnnotations::makeFinal(AnnotationLoc)));
-    else
-      Annotations->second.FinalAnnotationLoc = AnnotationLoc;
+    AnnotationInfos[II].FinalAnnotationLoc = AnnotationLoc;
   }
 
   const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const {
diff --git clang/include/clang/Parse/Parser.h clang/include/clang/Parse/Parser.h
index a7513069ff5d..47f72135c97c 100644
--- clang/include/clang/Parse/Parser.h
+++ clang/include/clang/Parse/Parser.h
@@ -2943,6 +2943,9 @@ private:
     return false;
   }
 
+  bool ParseSingleGNUAttribute(ParsedAttributes &Attrs, SourceLocation &EndLoc,
+                               LateParsedAttrList *LateAttrs = nullptr,
+                               Declarator *D = nullptr);
   void ParseGNUAttributes(ParsedAttributes &Attrs,
                           LateParsedAttrList *LateAttrs = nullptr,
                           Declarator *D = nullptr);
diff --git clang/include/clang/Sema/Overload.h clang/include/clang/Sema/Overload.h
index d6a6cee62a75..c716a25bb673 100644
--- clang/include/clang/Sema/Overload.h
+++ clang/include/clang/Sema/Overload.h
@@ -225,9 +225,6 @@ class Sema;
     /// HLSL Scalar Widening with promotion
     ICR_HLSL_Scalar_Widening_Promotion,
 
-    /// HLSL Matching Dimension Reduction
-    ICR_HLSL_Dimension_Reduction,
-
     /// Conversion
     ICR_Conversion,
 
@@ -250,6 +247,9 @@ class Sema;
     /// extension anyway.
     ICR_C_Conversion_Extension,
 
+    /// HLSL Matching Dimension Reduction
+    ICR_HLSL_Dimension_Reduction,
+
     /// HLSL Dimension reduction with promotion
     ICR_HLSL_Dimension_Reduction_Promotion,
 
diff --git clang/include/clang/Sema/Sema.h clang/include/clang/Sema/Sema.h
index 0358259945c7..99eef472223a 100644
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -6403,6 +6403,9 @@ public:
     /// example, in a for-range initializer).
     bool InLifetimeExtendingContext = false;
 
+    /// Whether we should rebuild CXXDefaultArgExpr and CXXDefaultInitExpr.
+    bool RebuildDefaultArgOrDefaultInit = false;
+
     // When evaluating immediate functions in the initializer of a default
     // argument or default member initializer, this is the declaration whose
     // default initializer is being evaluated and the location of the call
@@ -7810,9 +7813,11 @@ public:
   }
 
   bool isInLifetimeExtendingContext() const {
-    assert(!ExprEvalContexts.empty() &&
-           "Must be in an expression evaluation context");
-    return ExprEvalContexts.back().InLifetimeExtendingContext;
+    return currentEvaluationContext().InLifetimeExtendingContext;
+  }
+
+  bool needsRebuildOfDefaultArgOrInit() const {
+    return currentEvaluationContext().RebuildDefaultArgOrDefaultInit;
   }
 
   bool isCheckingDefaultArgumentOrInitializer() const {
@@ -7854,18 +7859,6 @@ public:
     return Res;
   }
 
-  /// keepInLifetimeExtendingContext - Pull down InLifetimeExtendingContext
-  /// flag from previous context.
-  void keepInLifetimeExtendingContext() {
-    if (ExprEvalContexts.size() > 2 &&
-        parentEvaluationContext().InLifetimeExtendingContext) {
-      auto &LastRecord = ExprEvalContexts.back();
-      auto &PrevRecord = parentEvaluationContext();
-      LastRecord.InLifetimeExtendingContext =
-          PrevRecord.InLifetimeExtendingContext;
-    }
-  }
-
   DefaultedComparisonKind getDefaultedComparisonKind(const FunctionDecl *FD) {
     return getDefaultedFunctionKind(FD).asComparison();
   }
@@ -11733,6 +11726,9 @@ public:
   /// receive true if the cause for the error is the associated constraints of
   /// the template not being satisfied by the template arguments.
   ///
+  /// \param DefaultArgs any default arguments from template specialization
+  /// deduction.
+  ///
   /// \param PartialOrderingTTP If true, assume these template arguments are
   /// the injected template arguments for a template template parameter.
   /// This will relax the requirement that all its possible uses are valid:
@@ -11742,7 +11738,8 @@ public:
   /// \returns true if an error occurred, false otherwise.
   bool CheckTemplateArgumentList(
       TemplateDecl *Template, SourceLocation TemplateLoc,
-      TemplateArgumentListInfo &TemplateArgs, bool PartialTemplateArgs,
+      TemplateArgumentListInfo &TemplateArgs,
+      const DefaultArguments &DefaultArgs, bool PartialTemplateArgs,
       SmallVectorImpl<TemplateArgument> &SugaredConverted,
       SmallVectorImpl<TemplateArgument> &CanonicalConverted,
       bool UpdateArgsWithConversions = true,
@@ -12479,8 +12476,8 @@ public:
                                     sema::TemplateDeductionInfo &Info);
 
   bool isTemplateTemplateParameterAtLeastAsSpecializedAs(
-      TemplateParameterList *PParam, TemplateDecl *AArg, SourceLocation Loc,
-      bool IsDeduced);
+      TemplateParameterList *PParam, TemplateDecl *AArg,
+      const DefaultArguments &DefaultArgs, SourceLocation Loc, bool IsDeduced);
 
   /// Mark which template parameters are used in a given expression.
   ///
diff --git clang/include/clang/Sema/SemaARM.h clang/include/clang/Sema/SemaARM.h
index b8196a3170d6..8c4c56e22213 100644
--- clang/include/clang/Sema/SemaARM.h
+++ clang/include/clang/Sema/SemaARM.h
@@ -13,7 +13,9 @@
 #ifndef LLVM_CLANG_SEMA_SEMAARM_H
 #define LLVM_CLANG_SEMA_SEMAARM_H
 
-#include "clang/AST/ASTFwd.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/Expr.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/SemaBase.h"
 #include "llvm/ADT/StringRef.h"
 #include <tuple>
@@ -40,15 +42,21 @@ public:
                             /// flags. Do Sema checks for the runtime mode.
   };
 
+  bool CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy, unsigned ArgIdx,
+                         unsigned EltBitWidth, unsigned VecBitWidth);
   bool CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
                                     unsigned MaxWidth);
   bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                                     CallExpr *TheCall);
+  bool PerformNeonImmChecks(
+      CallExpr *TheCall,
+      SmallVectorImpl<std::tuple<int, int, int, int>> &ImmChecks,
+      int OverloadType = -1);
+  bool
+  PerformSVEImmChecks(CallExpr *TheCall,
+                      SmallVectorImpl<std::tuple<int, int, int>> &ImmChecks);
   bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
-  bool
-  ParseSVEImmChecks(CallExpr *TheCall,
-                    llvm::SmallVector<std::tuple<int, int, int>, 3> &ImmChecks);
   bool CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckCDEBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                                    CallExpr *TheCall);
diff --git clang/include/clang/Sema/SemaHLSL.h clang/include/clang/Sema/SemaHLSL.h
index d79ca9a4fa18..64b39ca7712e 100644
--- clang/include/clang/Sema/SemaHLSL.h
+++ clang/include/clang/Sema/SemaHLSL.h
@@ -15,8 +15,10 @@
 
 #include "clang/AST/ASTFwd.h"
 #include "clang/AST/Attr.h"
+#include "clang/AST/Type.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Sema/SemaBase.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/TargetParser/Triple.h"
 #include <initializer_list>
 
@@ -26,6 +28,12 @@ class IdentifierInfo;
 class ParsedAttr;
 class Scope;
 
+// FIXME: This can be hidden (as static function in SemaHLSL.cpp) once we no
+// longer need to create builtin buffer types in HLSLExternalSemaSource.
+bool CreateHLSLAttributedResourceType(Sema &S, QualType Wrapped,
+                                      ArrayRef<const Attr *> AttrList,
+                                      QualType &ResType);
+
 class SemaHLSL : public SemaBase {
 public:
   SemaHLSL(Sema &S);
@@ -59,8 +67,6 @@ public:
   void handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL);
   void handlePackOffsetAttr(Decl *D, const ParsedAttr &AL);
   void handleShaderAttr(Decl *D, const ParsedAttr &AL);
-  void handleROVAttr(Decl *D, const ParsedAttr &AL);
-  void handleResourceClassAttr(Decl *D, const ParsedAttr &AL);
   void handleResourceBindingAttr(Decl *D, const ParsedAttr &AL);
   void handleParamModifierAttr(Decl *D, const ParsedAttr &AL);
   bool handleResourceTypeAttr(const ParsedAttr &AL);
@@ -71,12 +77,23 @@ public:
 
   // HLSL Type trait implementations
   bool IsScalarizedLayoutCompatible(QualType T1, QualType T2) const;
+  bool IsIntangibleType(QualType T1);
 
   bool CheckCompatibleParameterABI(FunctionDecl *New, FunctionDecl *Old);
 
   ExprResult ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg);
 
   QualType getInoutParameterType(QualType Ty);
+
+private:
+  // HLSL resource type attributes need to be processed all at once.
+  // This is a list to collect them.
+  llvm::SmallVector<const Attr *> HLSLResourcesTypeAttrs;
+
+  /// SourceLocation corresponding to HLSLAttributedResourceTypeLocs that we
+  /// have not yet populated.
+  llvm::DenseMap<const HLSLAttributedResourceType *, SourceLocation>
+      LocsForHLSLAttributedResources;
 };
 
 } // namespace clang
diff --git clang/include/clang/StaticAnalyzer/Core/CheckerManager.h clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
index ad25d18f2807..24c5b66fd582 100644
--- clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -164,8 +164,6 @@ public:
 
   bool hasPathSensitiveCheckers() const;
 
-  void finishedCheckerRegistration();
-
   const LangOptions &getLangOpts() const { return LangOpts; }
   const AnalyzerOptions &getAnalyzerOptions() const { return AOptions; }
   const Preprocessor &getPreprocessor() const {
diff --git clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
index 557f0e547ab4..4a343f2872d8 100644
--- clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
@@ -76,7 +76,7 @@ public:
   DependencyScanningService(
       ScanningMode Mode, ScanningOutputFormat Format,
       ScanningOptimizations OptimizeArgs = ScanningOptimizations::Default,
-      bool EagerLoadModules = false);
+      bool EagerLoadModules = false, bool TraceVFS = false);
 
   ScanningMode getMode() const { return Mode; }
 
@@ -86,6 +86,8 @@ public:
 
   bool shouldEagerLoadModules() const { return EagerLoadModules; }
 
+  bool shouldTraceVFS() const { return TraceVFS; }
+
   DependencyScanningFilesystemSharedCache &getSharedCache() {
     return SharedCache;
   }
@@ -97,6 +99,8 @@ private:
   const ScanningOptimizations OptimizeArgs;
   /// Whether to set up command-lines to load PCM files eagerly.
   const bool EagerLoadModules;
+  /// Whether to trace VFS accesses.
+  const bool TraceVFS;
   /// The global file system cache.
   DependencyScanningFilesystemSharedCache SharedCache;
 };
diff --git clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
index cb9476d1550d..012237e0278f 100644
--- clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
@@ -144,6 +144,8 @@ public:
       StringRef CWD, const llvm::DenseSet<ModuleID> &AlreadySeen,
       LookupModuleOutputCallback LookupModuleOutput);
 
+  llvm::vfs::FileSystem &getWorkerVFS() const { return Worker.getVFS(); }
+
 private:
   DependencyScanningWorker Worker;
 };
diff --git clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
index 0f607862194b..da6e0401411a 100644
--- clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
@@ -104,6 +104,8 @@ public:
 
   bool shouldEagerLoadModules() const { return EagerLoadModules; }
 
+  llvm::vfs::FileSystem &getVFS() const { return *BaseFS; }
+
 private:
   std::shared_ptr<PCHContainerOperations> PCHContainerOps;
   /// The file system to be used during the scan.
diff --git clang/include/clang/Tooling/Inclusions/StandardLibrary.h clang/include/clang/Tooling/Inclusions/StandardLibrary.h
index a39ceb520dcf..147f505ade05 100644
--- clang/include/clang/Tooling/Inclusions/StandardLibrary.h
+++ clang/include/clang/Tooling/Inclusions/StandardLibrary.h
@@ -21,6 +21,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include <optional>
 #include <string>
+#include <vector>
 
 namespace clang {
 class Decl;
diff --git clang/lib/APINotes/APINotesWriter.cpp clang/lib/APINotes/APINotesWriter.cpp
index c452677983bb..a2b3669a3144 100644
--- clang/lib/APINotes/APINotesWriter.cpp
+++ clang/lib/APINotes/APINotesWriter.cpp
@@ -129,13 +129,9 @@ class APINotesWriter::Implementation {
     if (Identifier.empty())
       return 0;
 
-    auto Known = IdentifierIDs.find(Identifier);
-    if (Known != IdentifierIDs.end())
-      return Known->second;
-
-    // Add to the identifier table.
-    Known = IdentifierIDs.insert({Identifier, IdentifierIDs.size() + 1}).first;
-    return Known->second;
+    // Add to the identifier table if missing.
+    return IdentifierIDs.try_emplace(Identifier, IdentifierIDs.size() + 1)
+        .first->second;
   }
 
   /// Retrieve the ID for the given selector.
@@ -147,14 +143,8 @@ class APINotesWriter::Implementation {
     for (auto piece : SelectorRef.Identifiers)
       Selector.Identifiers.push_back(getIdentifier(piece));
 
-    // Look for the stored selector.
-    auto Known = SelectorIDs.find(Selector);
-    if (Known != SelectorIDs.end())
-      return Known->second;
-
-    // Add to the selector table.
-    Known = SelectorIDs.insert({Selector, SelectorIDs.size()}).first;
-    return Known->second;
+    // Look for the stored selector.  Add to the selector table if missing.
+    return SelectorIDs.try_emplace(Selector, SelectorIDs.size()).first->second;
   }
 
 private:
diff --git clang/lib/AST/ASTContext.cpp clang/lib/AST/ASTContext.cpp
index c61234aa4d1a..8ece39a38304 100644
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -881,8 +881,8 @@ ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM,
       TemplateSpecializationTypes(this_()),
       DependentTemplateSpecializationTypes(this_()), AutoTypes(this_()),
       DependentBitIntTypes(this_()), SubstTemplateTemplateParmPacks(this_()),
-      ArrayParameterTypes(this_()), CanonTemplateTemplateParms(this_()),
-      SourceMgr(SM), LangOpts(LOpts),
+      DeducedTemplates(this_()), ArrayParameterTypes(this_()),
+      CanonTemplateTemplateParms(this_()), SourceMgr(SM), LangOpts(LOpts),
       NoSanitizeL(new NoSanitizeList(LangOpts.NoSanitizeFiles, SM)),
       XRayFilter(new XRayFunctionFilter(LangOpts.XRayAlwaysInstrumentFiles,
                                         LangOpts.XRayNeverInstrumentFiles,
@@ -2203,13 +2203,12 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
     // Because the length is only known at runtime, we use a dummy value
     // of 0 for the static length.  The alignment values are those defined
     // by the Procedure Call Standard for the Arm Architecture.
-#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits,    \
-                        IsSigned, IsFP, IsBF)                                  \
+#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId)                    \
   case BuiltinType::Id:                                                        \
     Width = 0;                                                                 \
     Align = 128;                                                               \
     break;
-#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId, NumEls)         \
+#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId)                 \
   case BuiltinType::Id:                                                        \
     Width = 0;                                                                 \
     Align = 16;                                                                \
@@ -4284,108 +4283,27 @@ ASTContext::getBuiltinVectorTypeInfo(const BuiltinType *Ty) const {
   switch (Ty->getKind()) {
   default:
     llvm_unreachable("Unsupported builtin vector type");
-  case BuiltinType::SveInt8:
-    return SVE_INT_ELTTY(8, 16, true, 1);
-  case BuiltinType::SveUint8:
-    return SVE_INT_ELTTY(8, 16, false, 1);
-  case BuiltinType::SveInt8x2:
-    return SVE_INT_ELTTY(8, 16, true, 2);
-  case BuiltinType::SveUint8x2:
-    return SVE_INT_ELTTY(8, 16, false, 2);
-  case BuiltinType::SveInt8x3:
-    return SVE_INT_ELTTY(8, 16, true, 3);
-  case BuiltinType::SveUint8x3:
-    return SVE_INT_ELTTY(8, 16, false, 3);
-  case BuiltinType::SveInt8x4:
-    return SVE_INT_ELTTY(8, 16, true, 4);
-  case BuiltinType::SveUint8x4:
-    return SVE_INT_ELTTY(8, 16, false, 4);
-  case BuiltinType::SveInt16:
-    return SVE_INT_ELTTY(16, 8, true, 1);
-  case BuiltinType::SveUint16:
-    return SVE_INT_ELTTY(16, 8, false, 1);
-  case BuiltinType::SveInt16x2:
-    return SVE_INT_ELTTY(16, 8, true, 2);
-  case BuiltinType::SveUint16x2:
-    return SVE_INT_ELTTY(16, 8, false, 2);
-  case BuiltinType::SveInt16x3:
-    return SVE_INT_ELTTY(16, 8, true, 3);
-  case BuiltinType::SveUint16x3:
-    return SVE_INT_ELTTY(16, 8, false, 3);
-  case BuiltinType::SveInt16x4:
-    return SVE_INT_ELTTY(16, 8, true, 4);
-  case BuiltinType::SveUint16x4:
-    return SVE_INT_ELTTY(16, 8, false, 4);
-  case BuiltinType::SveInt32:
-    return SVE_INT_ELTTY(32, 4, true, 1);
-  case BuiltinType::SveUint32:
-    return SVE_INT_ELTTY(32, 4, false, 1);
-  case BuiltinType::SveInt32x2:
-    return SVE_INT_ELTTY(32, 4, true, 2);
-  case BuiltinType::SveUint32x2:
-    return SVE_INT_ELTTY(32, 4, false, 2);
-  case BuiltinType::SveInt32x3:
-    return SVE_INT_ELTTY(32, 4, true, 3);
-  case BuiltinType::SveUint32x3:
-    return SVE_INT_ELTTY(32, 4, false, 3);
-  case BuiltinType::SveInt32x4:
-    return SVE_INT_ELTTY(32, 4, true, 4);
-  case BuiltinType::SveUint32x4:
-    return SVE_INT_ELTTY(32, 4, false, 4);
-  case BuiltinType::SveInt64:
-    return SVE_INT_ELTTY(64, 2, true, 1);
-  case BuiltinType::SveUint64:
-    return SVE_INT_ELTTY(64, 2, false, 1);
-  case BuiltinType::SveInt64x2:
-    return SVE_INT_ELTTY(64, 2, true, 2);
-  case BuiltinType::SveUint64x2:
-    return SVE_INT_ELTTY(64, 2, false, 2);
-  case BuiltinType::SveInt64x3:
-    return SVE_INT_ELTTY(64, 2, true, 3);
-  case BuiltinType::SveUint64x3:
-    return SVE_INT_ELTTY(64, 2, false, 3);
-  case BuiltinType::SveInt64x4:
-    return SVE_INT_ELTTY(64, 2, true, 4);
-  case BuiltinType::SveUint64x4:
-    return SVE_INT_ELTTY(64, 2, false, 4);
-  case BuiltinType::SveBool:
-    return SVE_ELTTY(BoolTy, 16, 1);
-  case BuiltinType::SveBoolx2:
-    return SVE_ELTTY(BoolTy, 16, 2);
-  case BuiltinType::SveBoolx4:
-    return SVE_ELTTY(BoolTy, 16, 4);
-  case BuiltinType::SveFloat16:
-    return SVE_ELTTY(HalfTy, 8, 1);
-  case BuiltinType::SveFloat16x2:
-    return SVE_ELTTY(HalfTy, 8, 2);
-  case BuiltinType::SveFloat16x3:
-    return SVE_ELTTY(HalfTy, 8, 3);
-  case BuiltinType::SveFloat16x4:
-    return SVE_ELTTY(HalfTy, 8, 4);
-  case BuiltinType::SveFloat32:
-    return SVE_ELTTY(FloatTy, 4, 1);
-  case BuiltinType::SveFloat32x2:
-    return SVE_ELTTY(FloatTy, 4, 2);
-  case BuiltinType::SveFloat32x3:
-    return SVE_ELTTY(FloatTy, 4, 3);
-  case BuiltinType::SveFloat32x4:
-    return SVE_ELTTY(FloatTy, 4, 4);
-  case BuiltinType::SveFloat64:
-    return SVE_ELTTY(DoubleTy, 2, 1);
-  case BuiltinType::SveFloat64x2:
-    return SVE_ELTTY(DoubleTy, 2, 2);
-  case BuiltinType::SveFloat64x3:
-    return SVE_ELTTY(DoubleTy, 2, 3);
-  case BuiltinType::SveFloat64x4:
-    return SVE_ELTTY(DoubleTy, 2, 4);
-  case BuiltinType::SveBFloat16:
-    return SVE_ELTTY(BFloat16Ty, 8, 1);
-  case BuiltinType::SveBFloat16x2:
-    return SVE_ELTTY(BFloat16Ty, 8, 2);
-  case BuiltinType::SveBFloat16x3:
-    return SVE_ELTTY(BFloat16Ty, 8, 3);
-  case BuiltinType::SveBFloat16x4:
-    return SVE_ELTTY(BFloat16Ty, 8, 4);
+
+#define SVE_VECTOR_TYPE_INT(Name, MangledName, Id, SingletonId, NumEls,        \
+                            ElBits, NF, IsSigned)                              \
+  case BuiltinType::Id:                                                        \
+    return {getIntTypeForBitwidth(ElBits, IsSigned),                           \
+            llvm::ElementCount::getScalable(NumEls), NF};
+#define SVE_VECTOR_TYPE_FLOAT(Name, MangledName, Id, SingletonId, NumEls,      \
+                              ElBits, NF)                                      \
+  case BuiltinType::Id:                                                        \
+    return {ElBits == 16 ? HalfTy : (ElBits == 32 ? FloatTy : DoubleTy),       \
+            llvm::ElementCount::getScalable(NumEls), NF};
+#define SVE_VECTOR_TYPE_BFLOAT(Name, MangledName, Id, SingletonId, NumEls,     \
+                               ElBits, NF)                                     \
+  case BuiltinType::Id:                                                        \
+    return {BFloat16Ty, llvm::ElementCount::getScalable(NumEls), NF};
+#define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \
+  case BuiltinType::Id:                                                        \
+    return {BoolTy, llvm::ElementCount::getScalable(NumEls), NF};
+#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId)
+#include "clang/Basic/AArch64SVEACLETypes.def"
+
 #define RVV_VECTOR_TYPE_INT(Name, Id, SingletonId, NumEls, ElBits, NF,         \
                             IsSigned)                                          \
   case BuiltinType::Id:                                                        \
@@ -4425,22 +4343,30 @@ QualType ASTContext::getScalableVectorType(QualType EltTy, unsigned NumElts,
                                            unsigned NumFields) const {
   if (Target->hasAArch64SVETypes()) {
     uint64_t EltTySize = getTypeSize(EltTy);
-#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits,    \
-                        IsSigned, IsFP, IsBF)                                  \
-  if (!EltTy->isBooleanType() &&                                               \
-      ((EltTy->hasIntegerRepresentation() &&                                   \
-        EltTy->hasSignedIntegerRepresentation() == IsSigned) ||                \
-       (EltTy->hasFloatingRepresentation() && !EltTy->isBFloat16Type() &&      \
-        IsFP && !IsBF) ||                                                      \
-       (EltTy->hasFloatingRepresentation() && EltTy->isBFloat16Type() &&       \
-        IsBF && !IsFP)) &&                                                     \
-      EltTySize == ElBits && NumElts == NumEls) {                              \
+
+#define SVE_VECTOR_TYPE_INT(Name, MangledName, Id, SingletonId, NumEls,        \
+                            ElBits, NF, IsSigned)                              \
+  if (EltTy->hasIntegerRepresentation() && !EltTy->isBooleanType() &&          \
+      EltTy->hasSignedIntegerRepresentation() == IsSigned &&                   \
+      EltTySize == ElBits && NumElts == (NumEls * NF) && NumFields == 1) {     \
     return SingletonId;                                                        \
   }
-#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId, NumEls)         \
-  if (EltTy->isBooleanType() && NumElts == NumEls)                             \
+#define SVE_VECTOR_TYPE_FLOAT(Name, MangledName, Id, SingletonId, NumEls,      \
+                              ElBits, NF)                                      \
+  if (EltTy->hasFloatingRepresentation() && !EltTy->isBFloat16Type() &&        \
+      EltTySize == ElBits && NumElts == (NumEls * NF) && NumFields == 1) {     \
+    return SingletonId;                                                        \
+  }
+#define SVE_VECTOR_TYPE_BFLOAT(Name, MangledName, Id, SingletonId, NumEls,     \
+                               ElBits, NF)                                     \
+  if (EltTy->hasFloatingRepresentation() && EltTy->isBFloat16Type() &&         \
+      EltTySize == ElBits && NumElts == (NumEls * NF) && NumFields == 1) {     \
+    return SingletonId;                                                        \
+  }
+#define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \
+  if (EltTy->isBooleanType() && NumElts == (NumEls * NF) && NumFields == 1)    \
     return SingletonId;
-#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingleTonId)
+#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId)
 #include "clang/Basic/AArch64SVEACLETypes.def"
   } else if (Target->hasRISCVVTypes()) {
     uint64_t EltTySize = getTypeSize(EltTy);
@@ -5411,7 +5337,7 @@ ASTContext::getTemplateSpecializationType(TemplateName Template,
   assert(!Template.getAsDependentTemplateName() &&
          "No dependent template names here!");
 
-  const auto *TD = Template.getAsTemplateDecl();
+  const auto *TD = Template.getAsTemplateDecl(/*IgnoreDeduced=*/true);
   bool IsTypeAlias = TD && TD->isTypeAlias();
   QualType CanonType;
   if (!Underlying.isNull())
@@ -5446,7 +5372,12 @@ QualType ASTContext::getCanonicalTemplateSpecializationType(
          "No dependent template names here!");
 
   // Build the canonical template specialization type.
-  TemplateName CanonTemplate = getCanonicalTemplateName(Template);
+  // Any DeducedTemplateNames are ignored, because the effective name of a TST
+  // accounts for the TST arguments laid over any default arguments contained in
+  // its name.
+  TemplateName CanonTemplate =
+      getCanonicalTemplateName(Template, /*IgnoreDeduced=*/true);
+
   bool AnyNonCanonArgs = false;
   auto CanonArgs =
       ::getCanonicalTemplateArguments(*this, Args, AnyNonCanonArgs);
@@ -6188,11 +6119,13 @@ QualType ASTContext::getPackIndexingType(QualType Pattern, Expr *IndexExpr,
                                          ArrayRef<QualType> Expansions,
                                          int Index) const {
   QualType Canonical;
+  bool ExpandsToEmptyPack = FullySubstituted && Expansions.empty();
   if (FullySubstituted && Index != -1) {
     Canonical = getCanonicalType(Expansions[Index]);
   } else {
     llvm::FoldingSetNodeID ID;
-    PackIndexingType::Profile(ID, *this, Pattern, IndexExpr);
+    PackIndexingType::Profile(ID, *this, Pattern, IndexExpr,
+                              ExpandsToEmptyPack);
     void *InsertPos = nullptr;
     PackIndexingType *Canon =
         DependentPackIndexingTypes.FindNodeOrInsertPos(ID, InsertPos);
@@ -6200,8 +6133,8 @@ QualType ASTContext::getPackIndexingType(QualType Pattern, Expr *IndexExpr,
       void *Mem = Allocate(
           PackIndexingType::totalSizeToAlloc<QualType>(Expansions.size()),
           TypeAlignment);
-      Canon = new (Mem)
-          PackIndexingType(*this, QualType(), Pattern, IndexExpr, Expansions);
+      Canon = new (Mem) PackIndexingType(*this, QualType(), Pattern, IndexExpr,
+                                         ExpandsToEmptyPack, Expansions);
       DependentPackIndexingTypes.InsertNode(Canon, InsertPos);
     }
     Canonical = QualType(Canon, 0);
@@ -6210,8 +6143,8 @@ QualType ASTContext::getPackIndexingType(QualType Pattern, Expr *IndexExpr,
   void *Mem =
       Allocate(PackIndexingType::totalSizeToAlloc<QualType>(Expansions.size()),
                TypeAlignment);
-  auto *T = new (Mem)
-      PackIndexingType(*this, Canonical, Pattern, IndexExpr, Expansions);
+  auto *T = new (Mem) PackIndexingType(*this, Canonical, Pattern, IndexExpr,
+                                       ExpandsToEmptyPack, Expansions);
   Types.push_back(T);
   return QualType(T, 0);
 }
@@ -6751,16 +6684,41 @@ ASTContext::getNameForTemplate(TemplateName Name,
   case TemplateName::UsingTemplate:
     return DeclarationNameInfo(Name.getAsUsingShadowDecl()->getDeclName(),
                                NameLoc);
+  case TemplateName::DeducedTemplate: {
+    DeducedTemplateStorage *DTS = Name.getAsDeducedTemplateName();
+    return getNameForTemplate(DTS->getUnderlying(), NameLoc);
+  }
   }
 
   llvm_unreachable("bad template name kind!");
 }
 
-TemplateName
-ASTContext::getCanonicalTemplateName(const TemplateName &Name) const {
+static const TemplateArgument *
+getDefaultTemplateArgumentOrNone(const NamedDecl *P) {
+  auto handleParam = [](auto *TP) -> const TemplateArgument * {
+    if (!TP->hasDefaultArgument())
+      return nullptr;
+    return &TP->getDefaultArgument().getArgument();
+  };
+  switch (P->getKind()) {
+  case NamedDecl::TemplateTypeParm:
+    return handleParam(cast<TemplateTypeParmDecl>(P));
+  case NamedDecl::NonTypeTemplateParm:
+    return handleParam(cast<NonTypeTemplateParmDecl>(P));
+  case NamedDecl::TemplateTemplateParm:
+    return handleParam(cast<TemplateTemplateParmDecl>(P));
+  default:
+    llvm_unreachable("Unexpected template parameter kind");
+  }
+}
+
+TemplateName ASTContext::getCanonicalTemplateName(TemplateName Name,
+                                                  bool IgnoreDeduced) const {
+  while (std::optional<TemplateName> UnderlyingOrNone =
+             Name.desugar(IgnoreDeduced))
+    Name = *UnderlyingOrNone;
+
   switch (Name.getKind()) {
-  case TemplateName::UsingTemplate:
-  case TemplateName::QualifiedTemplate:
   case TemplateName::Template: {
     TemplateDecl *Template = Name.getAsTemplateDecl();
     if (auto *TTP  = dyn_cast<TemplateTemplateParmDecl>(Template))
@@ -6780,12 +6738,6 @@ ASTContext::getCanonicalTemplateName(const TemplateName &Name) const {
     return DTN->CanonicalTemplateName;
   }
 
-  case TemplateName::SubstTemplateTemplateParm: {
-    SubstTemplateTemplateParmStorage *subst
-      = Name.getAsSubstTemplateTemplateParm();
-    return getCanonicalTemplateName(subst->getReplacement());
-  }
-
   case TemplateName::SubstTemplateTemplateParmPack: {
     SubstTemplateTemplateParmPackStorage *subst =
         Name.getAsSubstTemplateTemplateParmPack();
@@ -6795,15 +6747,58 @@ ASTContext::getCanonicalTemplateName(const TemplateName &Name) const {
         canonArgPack, subst->getAssociatedDecl()->getCanonicalDecl(),
         subst->getFinal(), subst->getIndex());
   }
+  case TemplateName::DeducedTemplate: {
+    assert(IgnoreDeduced == false);
+    DeducedTemplateStorage *DTS = Name.getAsDeducedTemplateName();
+    DefaultArguments DefArgs = DTS->getDefaultArguments();
+    TemplateName Underlying = DTS->getUnderlying();
+
+    TemplateName CanonUnderlying =
+        getCanonicalTemplateName(Underlying, /*IgnoreDeduced=*/true);
+    bool NonCanonical = CanonUnderlying != Underlying;
+    auto CanonArgs =
+        getCanonicalTemplateArguments(*this, DefArgs.Args, NonCanonical);
+
+    ArrayRef<NamedDecl *> Params =
+        CanonUnderlying.getAsTemplateDecl()->getTemplateParameters()->asArray();
+    assert(CanonArgs.size() <= Params.size());
+    // A deduced template name which deduces the same default arguments already
+    // declared in the underlying template is the same template as the
+    // underlying template. We need need to note any arguments which differ from
+    // the corresponding declaration. If any argument differs, we must build a
+    // deduced template name.
+    for (int I = CanonArgs.size() - 1; I >= 0; --I) {
+      const TemplateArgument *A = getDefaultTemplateArgumentOrNone(Params[I]);
+      if (!A)
+        break;
+      auto CanonParamDefArg = getCanonicalTemplateArgument(*A);
+      TemplateArgument &CanonDefArg = CanonArgs[I];
+      if (CanonDefArg.structurallyEquals(CanonParamDefArg))
+        continue;
+      // Keep popping from the back any deault arguments which are the same.
+      if (I == int(CanonArgs.size() - 1))
+        CanonArgs.pop_back();
+      NonCanonical = true;
+    }
+    return NonCanonical ? getDeducedTemplateName(
+                              CanonUnderlying,
+                              /*DefaultArgs=*/{DefArgs.StartPos, CanonArgs})
+                        : Name;
+  }
+  case TemplateName::UsingTemplate:
+  case TemplateName::QualifiedTemplate:
+  case TemplateName::SubstTemplateTemplateParm:
+    llvm_unreachable("always sugar node");
   }
 
   llvm_unreachable("bad template name!");
 }
 
 bool ASTContext::hasSameTemplateName(const TemplateName &X,
-                                     const TemplateName &Y) const {
-  return getCanonicalTemplateName(X).getAsVoidPointer() ==
-         getCanonicalTemplateName(Y).getAsVoidPointer();
+                                     const TemplateName &Y,
+                                     bool IgnoreDeduced) const {
+  return getCanonicalTemplateName(X, IgnoreDeduced) ==
+         getCanonicalTemplateName(Y, IgnoreDeduced);
 }
 
 bool ASTContext::isSameConstraintExpr(const Expr *XCE, const Expr *YCE) const {
@@ -7292,7 +7287,7 @@ ASTContext::getCanonicalTemplateArgument(const TemplateArgument &Arg) const {
     case TemplateArgument::StructuralValue:
       return TemplateArgument(*this,
                               getCanonicalType(Arg.getStructuralValueType()),
-                              Arg.getAsStructuralValue());
+                              Arg.getAsStructuralValue(), Arg.getIsDefaulted());
 
     case TemplateArgument::Type:
       return TemplateArgument(getCanonicalType(Arg.getAsType()),
@@ -7304,8 +7299,10 @@ ASTContext::getCanonicalTemplateArgument(const TemplateArgument &Arg) const {
           *this, Arg.pack_elements(), AnyNonCanonArgs);
       if (!AnyNonCanonArgs)
         return Arg;
-      return TemplateArgument::CreatePackCopy(const_cast<ASTContext &>(*this),
-                                              CanonArgs);
+      auto NewArg = TemplateArgument::CreatePackCopy(
+          const_cast<ASTContext &>(*this), CanonArgs);
+      NewArg.setIsDefaulted(Arg.getIsDefaulted());
+      return NewArg;
     }
   }
 
@@ -9862,6 +9859,30 @@ ASTContext::getSubstTemplateTemplateParmPack(const TemplateArgument &ArgPack,
   return TemplateName(Subst);
 }
 
+/// Retrieve the template name that represents a template name
+/// deduced from a specialization.
+TemplateName
+ASTContext::getDeducedTemplateName(TemplateName Underlying,
+                                   DefaultArguments DefaultArgs) const {
+  if (!DefaultArgs)
+    return Underlying;
+
+  llvm::FoldingSetNodeID ID;
+  DeducedTemplateStorage::Profile(ID, *this, Underlying, DefaultArgs);
+
+  void *InsertPos = nullptr;
+  DeducedTemplateStorage *DTS =
+      DeducedTemplates.FindNodeOrInsertPos(ID, InsertPos);
+  if (!DTS) {
+    void *Mem = Allocate(sizeof(DeducedTemplateStorage) +
+                             sizeof(TemplateArgument) * DefaultArgs.Args.size(),
+                         alignof(DeducedTemplateStorage));
+    DTS = new (Mem) DeducedTemplateStorage(Underlying, DefaultArgs);
+    DeducedTemplates.InsertNode(DTS, InsertPos);
+  }
+  return TemplateName(DTS);
+}
+
 /// getFromTargetType - Given one of the integer types provided by
 /// TargetInfo, produce the corresponding type. The unsigned @p Type
 /// is actually a value of type @c TargetInfo::IntType.
@@ -13018,22 +13039,24 @@ static T *getCommonDeclChecked(T *X, T *Y) {
 }
 
 static TemplateName getCommonTemplateName(ASTContext &Ctx, TemplateName X,
-                                          TemplateName Y) {
+                                          TemplateName Y,
+                                          bool IgnoreDeduced = false) {
   if (X.getAsVoidPointer() == Y.getAsVoidPointer())
     return X;
   // FIXME: There are cases here where we could find a common template name
   //        with more sugar. For example one could be a SubstTemplateTemplate*
   //        replacing the other.
-  TemplateName CX = Ctx.getCanonicalTemplateName(X);
+  TemplateName CX = Ctx.getCanonicalTemplateName(X, IgnoreDeduced);
   if (CX.getAsVoidPointer() !=
       Ctx.getCanonicalTemplateName(Y).getAsVoidPointer())
     return TemplateName();
   return CX;
 }
 
-static TemplateName
-getCommonTemplateNameChecked(ASTContext &Ctx, TemplateName X, TemplateName Y) {
-  TemplateName R = getCommonTemplateName(Ctx, X, Y);
+static TemplateName getCommonTemplateNameChecked(ASTContext &Ctx,
+                                                 TemplateName X, TemplateName Y,
+                                                 bool IgnoreDeduced) {
+  TemplateName R = getCommonTemplateName(Ctx, X, Y, IgnoreDeduced);
   assert(R.getAsVoidPointer() != nullptr);
   return R;
 }
@@ -13520,7 +13543,8 @@ static QualType getCommonNonSugarTypeNode(ASTContext &Ctx, const Type *X,
                                          TY->template_arguments());
     return Ctx.getTemplateSpecializationType(
         ::getCommonTemplateNameChecked(Ctx, TX->getTemplateName(),
-                                       TY->getTemplateName()),
+                                       TY->getTemplateName(),
+                                       /*IgnoreDeduced=*/true),
         As, X->getCanonicalTypeInternal());
   }
   case Type::Decltype: {
@@ -13749,8 +13773,9 @@ static QualType getCommonSugarTypeNode(ASTContext &Ctx, const Type *X,
   case Type::TemplateSpecialization: {
     const auto *TX = cast<TemplateSpecializationType>(X),
                *TY = cast<TemplateSpecializationType>(Y);
-    TemplateName CTN = ::getCommonTemplateName(Ctx, TX->getTemplateName(),
-                                               TY->getTemplateName());
+    TemplateName CTN =
+        ::getCommonTemplateName(Ctx, TX->getTemplateName(),
+                                TY->getTemplateName(), /*IgnoreDeduced=*/true);
     if (!CTN.getAsVoidPointer())
       return QualType();
     SmallVector<TemplateArgument, 8> Args;
diff --git clang/lib/AST/ASTDiagnostic.cpp clang/lib/AST/ASTDiagnostic.cpp
index 0680ff5e3a38..15c3efe42127 100644
--- clang/lib/AST/ASTDiagnostic.cpp
+++ clang/lib/AST/ASTDiagnostic.cpp
@@ -1114,8 +1114,8 @@ class TemplateDiff {
   // These functions build up the template diff tree, including functions to
   // retrieve and compare template arguments.
 
-  static const TemplateSpecializationType *GetTemplateSpecializationType(
-      ASTContext &Context, QualType Ty) {
+  static const TemplateSpecializationType *
+  GetTemplateSpecializationType(ASTContext &Context, QualType Ty) {
     if (const TemplateSpecializationType *TST =
             Ty->getAs<TemplateSpecializationType>())
       return TST;
@@ -1159,7 +1159,7 @@ class TemplateDiff {
     if (!FromArgTST || !ToArgTST)
       return true;
 
-    if (!hasSameTemplate(FromArgTST, ToArgTST))
+    if (!hasSameTemplate(Context, FromArgTST, ToArgTST))
       return true;
 
     return false;
@@ -1371,11 +1371,17 @@ class TemplateDiff {
   /// argument info into a tree.
   void DiffTemplate(const TemplateSpecializationType *FromTST,
                     const TemplateSpecializationType *ToTST) {
+    // FIXME: With P3310R0, A TST formed from a DeducedTemplateName might
+    // differ in template arguments which were not written.
     // Begin descent into diffing template tree.
     TemplateParameterList *ParamsFrom =
-        FromTST->getTemplateName().getAsTemplateDecl()->getTemplateParameters();
+        FromTST->getTemplateName()
+            .getAsTemplateDecl(/*IgnoreDeduced=*/true)
+            ->getTemplateParameters();
     TemplateParameterList *ParamsTo =
-        ToTST->getTemplateName().getAsTemplateDecl()->getTemplateParameters();
+        ToTST->getTemplateName()
+            .getAsTemplateDecl(/*IgnoreDeduced=*/true)
+            ->getTemplateParameters();
     unsigned TotalArgs = 0;
     for (TSTiterator FromIter(Context, FromTST), ToIter(Context, ToTST);
          !FromIter.isEnd() || !ToIter.isEnd(); ++TotalArgs) {
@@ -1427,20 +1433,24 @@ class TemplateDiff {
 
   /// hasSameBaseTemplate - Returns true when the base templates are the same,
   /// even if the template arguments are not.
-  static bool hasSameBaseTemplate(const TemplateSpecializationType *FromTST,
+  static bool hasSameBaseTemplate(ASTContext &Context,
+                                  const TemplateSpecializationType *FromTST,
                                   const TemplateSpecializationType *ToTST) {
-    return FromTST->getTemplateName().getAsTemplateDecl()->getCanonicalDecl() ==
-           ToTST->getTemplateName().getAsTemplateDecl()->getCanonicalDecl();
+    return Context.getCanonicalTemplateName(FromTST->getTemplateName(),
+                                            /*IgnoreDeduced=*/true) ==
+           Context.getCanonicalTemplateName(ToTST->getTemplateName(),
+                                            /*IgnoreDeduced=*/true);
   }
 
   /// hasSameTemplate - Returns true if both types are specialized from the
   /// same template declaration.  If they come from different template aliases,
   /// do a parallel ascension search to determine the highest template alias in
   /// common and set the arguments to them.
-  static bool hasSameTemplate(const TemplateSpecializationType *&FromTST,
+  static bool hasSameTemplate(ASTContext &Context,
+                              const TemplateSpecializationType *&FromTST,
                               const TemplateSpecializationType *&ToTST) {
     // Check the top templates if they are the same.
-    if (hasSameBaseTemplate(FromTST, ToTST))
+    if (hasSameBaseTemplate(Context, FromTST, ToTST))
       return true;
 
     // Create vectors of template aliases.
@@ -1455,14 +1465,14 @@ class TemplateDiff {
         ToIter = ToTemplateList.rbegin(), ToEnd = ToTemplateList.rend();
 
     // Check if the lowest template types are the same.  If not, return.
-    if (!hasSameBaseTemplate(*FromIter, *ToIter))
+    if (!hasSameBaseTemplate(Context, *FromIter, *ToIter))
       return false;
 
     // Begin searching up the template aliases.  The bottom most template
     // matches so move up until one pair does not match.  Use the template
     // right before that one.
     for (; FromIter != FromEnd && ToIter != ToEnd; ++FromIter, ++ToIter) {
-      if (!hasSameBaseTemplate(*FromIter, *ToIter))
+      if (!hasSameBaseTemplate(Context, *FromIter, *ToIter))
         break;
     }
 
@@ -2123,7 +2133,7 @@ public:
       return;
 
     // Different base templates.
-    if (!hasSameTemplate(FromOrigTST, ToOrigTST)) {
+    if (!hasSameTemplate(Context, FromOrigTST, ToOrigTST)) {
       return;
     }
 
@@ -2131,10 +2141,11 @@ public:
     ToQual -= QualType(ToOrigTST, 0).getQualifiers();
 
     // Same base template, but different arguments.
-    Tree.SetTemplateDiff(FromOrigTST->getTemplateName().getAsTemplateDecl(),
-                         ToOrigTST->getTemplateName().getAsTemplateDecl(),
-                         FromQual, ToQual, false /*FromDefault*/,
-                         false /*ToDefault*/);
+    Tree.SetTemplateDiff(
+        FromOrigTST->getTemplateName().getAsTemplateDecl(
+            /*IgnoreDeduced=*/true),
+        ToOrigTST->getTemplateName().getAsTemplateDecl(/*IgnoreDeduced=*/true),
+        FromQual, ToQual, false /*FromDefault*/, false /*ToDefault*/);
 
     DiffTemplate(FromOrigTST, ToOrigTST);
   }
diff --git clang/lib/AST/ASTImporter.cpp clang/lib/AST/ASTImporter.cpp
index fa850409ba12..c2fb7dddcfc6 100644
--- clang/lib/AST/ASTImporter.cpp
+++ clang/lib/AST/ASTImporter.cpp
@@ -360,51 +360,42 @@ namespace clang {
     }
 
     template <typename TemplateParmDeclT>
-    void tryUpdateTemplateParmDeclInheritedFrom(NamedDecl *RecentParm,
-                                                NamedDecl *NewParm) {
-      if (auto *ParmT = dyn_cast<TemplateParmDeclT>(RecentParm)) {
-        if (ParmT->hasDefaultArgument()) {
-          auto *P = cast<TemplateParmDeclT>(NewParm);
-          P->removeDefaultArgument();
-          P->setInheritedDefaultArgument(Importer.ToContext, ParmT);
+    Error importTemplateParameterDefaultArgument(const TemplateParmDeclT *D,
+                                                 TemplateParmDeclT *ToD) {
+      Error Err = Error::success();
+      if (D->hasDefaultArgument()) {
+        if (D->defaultArgumentWasInherited()) {
+          auto *ToInheritedFrom = const_cast<TemplateParmDeclT *>(
+              importChecked(Err, D->getDefaultArgStorage().getInheritedFrom()));
+          if (Err)
+            return Err;
+          if (!ToInheritedFrom->hasDefaultArgument()) {
+            // Resolve possible circular dependency between default value of the
+            // template argument and the template declaration.
+            const auto ToInheritedDefaultArg =
+                importChecked(Err, D->getDefaultArgStorage()
+                                       .getInheritedFrom()
+                                       ->getDefaultArgument());
+            if (Err)
+              return Err;
+            ToInheritedFrom->setDefaultArgument(Importer.getToContext(),
+                                                ToInheritedDefaultArg);
+          }
+          ToD->setInheritedDefaultArgument(ToD->getASTContext(),
+                                           ToInheritedFrom);
+        } else {
+          Expected<TemplateArgumentLoc> ToDefaultArgOrErr =
+              import(D->getDefaultArgument());
+          if (!ToDefaultArgOrErr)
+            return ToDefaultArgOrErr.takeError();
+          // Default argument could have been set in the
+          // '!ToInheritedFrom->hasDefaultArgument()' branch above.
+          if (!ToD->hasDefaultArgument())
+            ToD->setDefaultArgument(Importer.getToContext(),
+                                    *ToDefaultArgOrErr);
         }
       }
-    }
-
-    // Update the parameter list `NewParams` of a template declaration
-    // by "inheriting" default argument values from `RecentParams`,
-    // which is the parameter list of an earlier declaration of the
-    // same template. (Note that "inheriting" default argument values
-    // is not related to object-oriented inheritance.)
-    //
-    // In the clang AST template parameters (NonTypeTemplateParmDec,
-    // TemplateTypeParmDecl, TemplateTemplateParmDecl) have a reference to the
-    // default value, if one is specified at the first declaration. The default
-    // value can be specified only once. The template parameters of the
-    // following declarations have a reference to the original default value
-    // through the "inherited" value. This value should be set for all imported
-    // template parameters that have a previous declaration (also a previous
-    // template declaration).
-    //
-    // In the `Visit*ParmDecl` functions the default value of these template
-    // arguments is always imported. At that location the previous declaration
-    // is not easily accessible, it is not possible to call
-    // `setInheritedDefaultArgument` at that place.
-    // `updateTemplateParametersInheritedFrom` is called later when the already
-    // imported default value is erased and changed to "inherited".
-    // It is important to change the mode to "inherited" otherwise false
-    // structural in-equivalences could be detected.
-    void updateTemplateParametersInheritedFrom(
-        const TemplateParameterList &RecentParams,
-        TemplateParameterList &NewParams) {
-      for (auto [Idx, Param] : enumerate(RecentParams)) {
-        tryUpdateTemplateParmDeclInheritedFrom<NonTypeTemplateParmDecl>(
-            Param, NewParams.getParam(Idx));
-        tryUpdateTemplateParmDeclInheritedFrom<TemplateTypeParmDecl>(
-            Param, NewParams.getParam(Idx));
-        tryUpdateTemplateParmDeclInheritedFrom<TemplateTemplateParmDecl>(
-            Param, NewParams.getParam(Idx));
-      }
+      return Err;
     }
 
   public:
@@ -5955,8 +5946,8 @@ ASTNodeImporter::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) {
 ExpectedDecl
 ASTNodeImporter::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) {
   // For template arguments, we adopt the translation unit as our declaration
-  // context. This context will be fixed when the actual template declaration
-  // is created.
+  // context. This context will be fixed when (during) the actual template
+  // declaration is created.
 
   ExpectedSLoc BeginLocOrErr = import(D->getBeginLoc());
   if (!BeginLocOrErr)
@@ -5988,13 +5979,8 @@ ASTNodeImporter::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) {
     ToD->setTypeConstraint(ToConceptRef, ToIDC);
   }
 
-  if (D->hasDefaultArgument()) {
-    Expected<TemplateArgumentLoc> ToDefaultArgOrErr =
-        import(D->getDefaultArgument());
-    if (!ToDefaultArgOrErr)
-      return ToDefaultArgOrErr.takeError();
-    ToD->setDefaultArgument(ToD->getASTContext(), *ToDefaultArgOrErr);
-  }
+  if (Error Err = importTemplateParameterDefaultArgument(D, ToD))
+    return Err;
 
   return ToD;
 }
@@ -6020,13 +6006,9 @@ ASTNodeImporter::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) {
                               D->isParameterPack(), ToTypeSourceInfo))
     return ToD;
 
-  if (D->hasDefaultArgument()) {
-    Expected<TemplateArgumentLoc> ToDefaultArgOrErr =
-        import(D->getDefaultArgument());
-    if (!ToDefaultArgOrErr)
-      return ToDefaultArgOrErr.takeError();
-    ToD->setDefaultArgument(Importer.getToContext(), *ToDefaultArgOrErr);
-  }
+  Err = importTemplateParameterDefaultArgument(D, ToD);
+  if (Err)
+    return Err;
 
   return ToD;
 }
@@ -6057,13 +6039,8 @@ ASTNodeImporter::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) {
           *TemplateParamsOrErr))
     return ToD;
 
-  if (D->hasDefaultArgument()) {
-    Expected<TemplateArgumentLoc> ToDefaultArgOrErr =
-        import(D->getDefaultArgument());
-    if (!ToDefaultArgOrErr)
-      return ToDefaultArgOrErr.takeError();
-    ToD->setDefaultArgument(Importer.getToContext(), *ToDefaultArgOrErr);
-  }
+  if (Error Err = importTemplateParameterDefaultArgument(D, ToD))
+    return Err;
 
   return ToD;
 }
@@ -6201,9 +6178,6 @@ ExpectedDecl ASTNodeImporter::VisitClassTemplateDecl(ClassTemplateDecl *D) {
     }
 
     D2->setPreviousDecl(Recent);
-
-    updateTemplateParametersInheritedFrom(*(Recent->getTemplateParameters()),
-                                          **TemplateParamsOrErr);
   }
 
   return D2;
@@ -6518,9 +6492,6 @@ ExpectedDecl ASTNodeImporter::VisitVarTemplateDecl(VarTemplateDecl *D) {
         ToTemplated->setPreviousDecl(PrevTemplated);
     }
     ToVarTD->setPreviousDecl(Recent);
-
-    updateTemplateParametersInheritedFrom(*(Recent->getTemplateParameters()),
-                                          **TemplateParamsOrErr);
   }
 
   return ToVarTD;
@@ -6793,9 +6764,6 @@ ASTNodeImporter::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) {
         TemplatedFD->setPreviousDecl(PrevTemplated);
     }
     ToFunc->setPreviousDecl(Recent);
-
-    updateTemplateParametersInheritedFrom(*(Recent->getTemplateParameters()),
-                                          *Params);
   }
 
   return ToFunc;
@@ -9967,6 +9935,8 @@ Expected<TemplateName> ASTImporter::Import(TemplateName From) {
       return UsingOrError.takeError();
     return TemplateName(cast<UsingShadowDecl>(*UsingOrError));
   }
+  case TemplateName::DeducedTemplate:
+    llvm_unreachable("Unexpected DeducedTemplate");
   }
 
   llvm_unreachable("Invalid template name kind");
diff --git clang/lib/AST/ASTStructuralEquivalence.cpp clang/lib/AST/ASTStructuralEquivalence.cpp
index 0b791700aa48..f13ca2d08d76 100644
--- clang/lib/AST/ASTStructuralEquivalence.cpp
+++ clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -645,6 +645,9 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
      // It is sufficient to check value of getAsTemplateDecl.
      break;
 
+   case TemplateName::DeducedTemplate:
+     // FIXME: We can't reach here.
+     llvm_unreachable("unimplemented");
   }
 
   return true;
diff --git clang/lib/AST/ByteCode/ByteCodeEmitter.h clang/lib/AST/ByteCode/ByteCodeEmitter.h
index 7cbbe651699b..ac728830527a 100644
--- clang/lib/AST/ByteCode/ByteCodeEmitter.h
+++ clang/lib/AST/ByteCode/ByteCodeEmitter.h
@@ -46,7 +46,7 @@ protected:
 
   /// Methods implemented by the compiler.
   virtual bool visitFunc(const FunctionDecl *E) = 0;
-  virtual bool visitExpr(const Expr *E) = 0;
+  virtual bool visitExpr(const Expr *E, bool DestroyToplevelScope) = 0;
   virtual bool visitDeclAndReturn(const VarDecl *E, bool ConstantContext) = 0;
 
   /// Emits jumps.
diff --git clang/lib/AST/ByteCode/Compiler.cpp clang/lib/AST/ByteCode/Compiler.cpp
index a831f196abdc..265350e44d95 100644
--- clang/lib/AST/ByteCode/Compiler.cpp
+++ clang/lib/AST/ByteCode/Compiler.cpp
@@ -114,19 +114,27 @@ public:
 
   LoopScope(Compiler<Emitter> *Ctx, LabelTy BreakLabel, LabelTy ContinueLabel)
       : LabelScope<Emitter>(Ctx), OldBreakLabel(Ctx->BreakLabel),
-        OldContinueLabel(Ctx->ContinueLabel) {
+        OldContinueLabel(Ctx->ContinueLabel),
+        OldBreakVarScope(Ctx->BreakVarScope),
+        OldContinueVarScope(Ctx->ContinueVarScope) {
     this->Ctx->BreakLabel = BreakLabel;
     this->Ctx->ContinueLabel = ContinueLabel;
+    this->Ctx->BreakVarScope = this->Ctx->VarScope;
+    this->Ctx->ContinueVarScope = this->Ctx->VarScope;
   }
 
   ~LoopScope() {
     this->Ctx->BreakLabel = OldBreakLabel;
     this->Ctx->ContinueLabel = OldContinueLabel;
+    this->Ctx->ContinueVarScope = OldContinueVarScope;
+    this->Ctx->BreakVarScope = OldBreakVarScope;
   }
 
 private:
   OptLabelTy OldBreakLabel;
   OptLabelTy OldContinueLabel;
+  VariableScope<Emitter> *OldBreakVarScope;
+  VariableScope<Emitter> *OldContinueVarScope;
 };
 
 // Sets the context for a switch scope, mapping labels.
@@ -140,22 +148,26 @@ public:
               OptLabelTy DefaultLabel)
       : LabelScope<Emitter>(Ctx), OldBreakLabel(Ctx->BreakLabel),
         OldDefaultLabel(this->Ctx->DefaultLabel),
-        OldCaseLabels(std::move(this->Ctx->CaseLabels)) {
+        OldCaseLabels(std::move(this->Ctx->CaseLabels)),
+        OldLabelVarScope(Ctx->BreakVarScope) {
     this->Ctx->BreakLabel = BreakLabel;
     this->Ctx->DefaultLabel = DefaultLabel;
     this->Ctx->CaseLabels = std::move(CaseLabels);
+    this->Ctx->BreakVarScope = this->Ctx->VarScope;
   }
 
   ~SwitchScope() {
     this->Ctx->BreakLabel = OldBreakLabel;
     this->Ctx->DefaultLabel = OldDefaultLabel;
     this->Ctx->CaseLabels = std::move(OldCaseLabels);
+    this->Ctx->BreakVarScope = OldLabelVarScope;
   }
 
 private:
   OptLabelTy OldBreakLabel;
   OptLabelTy OldDefaultLabel;
   CaseMap OldCaseLabels;
+  VariableScope<Emitter> *OldLabelVarScope;
 };
 
 template <class Emitter> class StmtExprScope final {
@@ -687,6 +699,8 @@ bool Compiler<Emitter>::VisitParenExpr(const ParenExpr *E) {
 template <class Emitter>
 bool Compiler<Emitter>::VisitBinaryOperator(const BinaryOperator *BO) {
   // Need short-circuiting for these.
+  if (BO->getType()->isVectorType())
+    return this->VisitVectorBinOp(BO);
   if (BO->isLogicalOp())
     return this->VisitLogicalBinOp(BO);
 
@@ -1222,6 +1236,123 @@ bool Compiler<Emitter>::VisitComplexBinOp(const BinaryOperator *E) {
   return true;
 }
 
+template <class Emitter>
+bool Compiler<Emitter>::VisitVectorBinOp(const BinaryOperator *E) {
+  assert(E->getType()->isVectorType());
+  assert(E->getLHS()->getType()->isVectorType());
+  assert(E->getRHS()->getType()->isVectorType());
+
+  // FIXME: Current only support comparison binary operator, add support for
+  // other binary operator.
+  if (!E->isComparisonOp() && !E->isLogicalOp())
+    return this->emitInvalid(E);
+  // Prepare storage for result.
+  if (!Initializing) {
+    unsigned LocalIndex = allocateTemporary(E);
+    if (!this->emitGetPtrLocal(LocalIndex, E))
+      return false;
+  }
+
+  const Expr *LHS = E->getLHS();
+  const Expr *RHS = E->getRHS();
+  const auto *VecTy = E->getType()->getAs<VectorType>();
+
+  // The LHS and RHS of a comparison operator must have the same type. So we
+  // just use LHS vector element type here.
+  PrimType ElemT = this->classifyVectorElementType(LHS->getType());
+  PrimType ResultElemT = this->classifyVectorElementType(E->getType());
+
+  // Evaluate LHS and save value to LHSOffset.
+  unsigned LHSOffset = this->allocateLocalPrimitive(LHS, PT_Ptr, true, false);
+  if (!this->visit(LHS))
+    return false;
+  if (!this->emitSetLocal(PT_Ptr, LHSOffset, E))
+    return false;
+
+  // Evaluate RHS and save value to RHSOffset.
+  unsigned RHSOffset = this->allocateLocalPrimitive(RHS, PT_Ptr, true, false);
+  if (!this->visit(RHS))
+    return false;
+  if (!this->emitSetLocal(PT_Ptr, RHSOffset, E))
+    return false;
+
+  auto getElem = [=](unsigned Offset, unsigned Index) {
+    if (!this->emitGetLocal(PT_Ptr, Offset, E))
+      return false;
+    if (!this->emitArrayElemPop(ElemT, Index, E))
+      return false;
+    if (E->isLogicalOp()) {
+      if (!this->emitPrimCast(ElemT, PT_Bool, Ctx.getASTContext().BoolTy, E))
+        return false;
+      if (!this->emitPrimCast(PT_Bool, ResultElemT, VecTy->getElementType(), E))
+        return false;
+    }
+    return true;
+  };
+
+  for (unsigned I = 0; I != VecTy->getNumElements(); ++I) {
+    if (!getElem(LHSOffset, I))
+      return false;
+    if (!getElem(RHSOffset, I))
+      return false;
+    switch (E->getOpcode()) {
+    case BO_EQ:
+      if (!this->emitEQ(ElemT, E))
+        return false;
+      break;
+    case BO_NE:
+      if (!this->emitNE(ElemT, E))
+        return false;
+      break;
+    case BO_LE:
+      if (!this->emitLE(ElemT, E))
+        return false;
+      break;
+    case BO_LT:
+      if (!this->emitLT(ElemT, E))
+        return false;
+      break;
+    case BO_GE:
+      if (!this->emitGE(ElemT, E))
+        return false;
+      break;
+    case BO_GT:
+      if (!this->emitGT(ElemT, E))
+        return false;
+      break;
+    case BO_LAnd:
+      // a && b is equivalent to a!=0 & b!=0
+      if (!this->emitBitAnd(ResultElemT, E))
+        return false;
+      break;
+    case BO_LOr:
+      // a || b is equivalent to a!=0 | b!=0
+      if (!this->emitBitOr(ResultElemT, E))
+        return false;
+      break;
+    default:
+      llvm_unreachable("Unsupported binary operator");
+    }
+
+    // The result of the comparison is a vector of the same width and number
+    // of elements as the comparison operands with a signed integral element
+    // type.
+    //
+    // https://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html
+    if (E->isComparisonOp()) {
+      if (!this->emitPrimCast(PT_Bool, ResultElemT, VecTy->getElementType(), E))
+        return false;
+      if (!this->emitNeg(ResultElemT, E))
+        return false;
+    }
+
+    // Initialize array element with the value we just computed.
+    if (!this->emitInitElem(ResultElemT, I, E))
+      return false;
+  }
+  return true;
+}
+
 template <class Emitter>
 bool Compiler<Emitter>::VisitImplicitValueInitExpr(
     const ImplicitValueInitExpr *E) {
@@ -1773,8 +1904,12 @@ bool Compiler<Emitter>::VisitMemberExpr(const MemberExpr *E) {
     return false;
   }
 
-  if (!isa<FieldDecl>(Member))
-    return this->discard(Base) && this->visitDeclRef(Member, E);
+  if (!isa<FieldDecl>(Member)) {
+    if (!this->discard(Base) && !this->emitSideEffect(E))
+      return false;
+
+    return this->visitDeclRef(Member, E);
+  }
 
   if (Initializing) {
     if (!this->delegate(Base))
@@ -2478,8 +2613,15 @@ bool Compiler<Emitter>::VisitCXXReinterpretCastExpr(
     const CXXReinterpretCastExpr *E) {
   const Expr *SubExpr = E->getSubExpr();
 
-  bool TypesMatch = classify(E) == classify(SubExpr);
-  if (!this->emitInvalidCast(CastKind::Reinterpret, /*Fatal=*/!TypesMatch, E))
+  bool Fatal = false;
+  std::optional<PrimType> FromT = classify(SubExpr);
+  std::optional<PrimType> ToT = classify(E);
+  if (!FromT || !ToT)
+    Fatal = true;
+  else
+    Fatal = (ToT != FromT);
+
+  if (!this->emitInvalidCast(CastKind::Reinterpret, Fatal, E))
     return false;
 
   return this->delegate(SubExpr);
@@ -2565,8 +2707,14 @@ bool Compiler<Emitter>::VisitCXXConstructExpr(const CXXConstructExpr *E) {
       if (!this->emitCallVar(Func, VarArgSize, E))
         return false;
     } else {
-      if (!this->emitCall(Func, 0, E))
+      if (!this->emitCall(Func, 0, E)) {
+        // When discarding, we don't need the result anyway, so clean up
+        // the instance dup we did earlier in case surrounding code wants
+        // to keep evaluating.
+        if (DiscardResult)
+          (void)this->emitPopPtr(E);
         return false;
+      }
     }
 
     if (DiscardResult)
@@ -3187,7 +3335,11 @@ bool Compiler<Emitter>::VisitObjCBoxedExpr(const ObjCBoxedExpr *E) {
   if (!E->isExpressibleAsConstantInitializer())
     return this->discard(SubExpr) && this->emitInvalid(E);
 
-  return this->delegate(SubExpr);
+  assert(classifyPrim(E) == PT_Ptr);
+  if (std::optional<unsigned> I = P.getOrCreateDummy(E))
+    return this->emitGetPtrGlobal(*I, E);
+
+  return false;
 }
 
 template <class Emitter>
@@ -3202,6 +3354,10 @@ bool Compiler<Emitter>::VisitCXXStdInitializerListExpr(
 
   if (!this->visit(SubExpr))
     return false;
+  if (!this->emitConstUint8(0, E))
+    return false;
+  if (!this->emitArrayElemPtrPopUint8(E))
+    return false;
   if (!this->emitInitFieldPtr(R->getField(0u)->Offset, E))
     return false;
 
@@ -3216,6 +3372,8 @@ bool Compiler<Emitter>::VisitCXXStdInitializerListExpr(
 
   if (!this->emitGetFieldPtr(R->getField(0u)->Offset, E))
     return false;
+  if (!this->emitExpandPtr(E))
+    return false;
   if (!this->emitConst(static_cast<APSInt>(ArrayType->getSize()), PT_Uint64, E))
     return false;
   if (!this->emitArrayElemPtrPop(PT_Uint64, E))
@@ -3615,20 +3773,29 @@ const Function *Compiler<Emitter>::getFunction(const FunctionDecl *FD) {
   return Ctx.getOrCreateFunction(FD);
 }
 
-template <class Emitter> bool Compiler<Emitter>::visitExpr(const Expr *E) {
+template <class Emitter>
+bool Compiler<Emitter>::visitExpr(const Expr *E, bool DestroyToplevelScope) {
   LocalScope<Emitter> RootScope(this);
+
+  auto maybeDestroyLocals = [&]() -> bool {
+    if (DestroyToplevelScope)
+      return RootScope.destroyLocals();
+    return true;
+  };
+
   // Void expressions.
   if (E->getType()->isVoidType()) {
     if (!visit(E))
       return false;
-    return this->emitRetVoid(E) && RootScope.destroyLocals();
+    return this->emitRetVoid(E) && maybeDestroyLocals();
   }
 
   // Expressions with a primitive return type.
   if (std::optional<PrimType> T = classify(E)) {
     if (!visit(E))
       return false;
-    return this->emitRet(*T, E) && RootScope.destroyLocals();
+
+    return this->emitRet(*T, E) && maybeDestroyLocals();
   }
 
   // Expressions with a composite return type.
@@ -3646,10 +3813,10 @@ template <class Emitter> bool Compiler<Emitter>::visitExpr(const Expr *E) {
     // We are destroying the locals AFTER the Ret op.
     // The Ret op needs to copy the (alive) values, but the
     // destructors may still turn the entire expression invalid.
-    return this->emitRetValue(E) && RootScope.destroyLocals();
+    return this->emitRetValue(E) && maybeDestroyLocals();
   }
 
-  RootScope.destroyLocals();
+  (void)maybeDestroyLocals();
   return false;
 }
 
@@ -3943,19 +4110,19 @@ bool Compiler<Emitter>::visitAPValueInitializer(const APValue &Val,
 }
 
 template <class Emitter>
-bool Compiler<Emitter>::VisitBuiltinCallExpr(const CallExpr *E) {
+bool Compiler<Emitter>::VisitBuiltinCallExpr(const CallExpr *E,
+                                             unsigned BuiltinID) {
   const Function *Func = getFunction(E->getDirectCallee());
   if (!Func)
     return false;
 
   // For these, we're expected to ultimately return an APValue pointing
   // to the CallExpr. This is needed to get the correct codegen.
-  unsigned Builtin = E->getBuiltinCallee();
-  if (Builtin == Builtin::BI__builtin___CFStringMakeConstantString ||
-      Builtin == Builtin::BI__builtin___NSStringMakeConstantString ||
-      Builtin == Builtin::BI__builtin_ptrauth_sign_constant ||
-      Builtin == Builtin::BI__builtin_function_start) {
-    if (std::optional<unsigned> GlobalOffset = P.createGlobal(E)) {
+  if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString ||
+      BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString ||
+      BuiltinID == Builtin::BI__builtin_ptrauth_sign_constant ||
+      BuiltinID == Builtin::BI__builtin_function_start) {
+    if (std::optional<unsigned> GlobalOffset = P.getOrCreateDummy(E)) {
       if (!this->emitGetPtrGlobal(*GlobalOffset, E))
         return false;
 
@@ -3986,7 +4153,7 @@ bool Compiler<Emitter>::VisitBuiltinCallExpr(const CallExpr *E) {
     }
   }
 
-  if (!this->emitCallBI(Func, E, E))
+  if (!this->emitCallBI(Func, E, BuiltinID, E))
     return false;
 
   if (DiscardResult && !ReturnType->isVoidType()) {
@@ -3999,13 +4166,24 @@ bool Compiler<Emitter>::VisitBuiltinCallExpr(const CallExpr *E) {
 
 template <class Emitter>
 bool Compiler<Emitter>::VisitCallExpr(const CallExpr *E) {
-  if (E->getBuiltinCallee())
-    return VisitBuiltinCallExpr(E);
+  if (unsigned BuiltinID = E->getBuiltinCallee())
+    return VisitBuiltinCallExpr(E, BuiltinID);
+
+  const FunctionDecl *FuncDecl = E->getDirectCallee();
+  // Calls to replaceable operator new/operator delete.
+  if (FuncDecl && FuncDecl->isReplaceableGlobalAllocationFunction()) {
+    if (FuncDecl->getDeclName().getCXXOverloadedOperator() == OO_New ||
+        FuncDecl->getDeclName().getCXXOverloadedOperator() == OO_Array_New) {
+      return VisitBuiltinCallExpr(E, Builtin::BI__builtin_operator_new);
+    } else {
+      assert(FuncDecl->getDeclName().getCXXOverloadedOperator() == OO_Delete);
+      return VisitBuiltinCallExpr(E, Builtin::BI__builtin_operator_delete);
+    }
+  }
 
   QualType ReturnType = E->getCallReturnType(Ctx.getASTContext());
   std::optional<PrimType> T = classify(ReturnType);
   bool HasRVO = !ReturnType->isVoidType() && !T;
-  const FunctionDecl *FuncDecl = E->getDirectCallee();
 
   if (HasRVO) {
     if (DiscardResult) {
@@ -4435,18 +4613,23 @@ bool Compiler<Emitter>::visitWhileStmt(const WhileStmt *S) {
   this->fallthrough(CondLabel);
   this->emitLabel(CondLabel);
 
-  if (const DeclStmt *CondDecl = S->getConditionVariableDeclStmt())
-    if (!visitDeclStmt(CondDecl))
-      return false;
+  {
+    LocalScope<Emitter> CondScope(this);
+    if (const DeclStmt *CondDecl = S->getConditionVariableDeclStmt())
+      if (!visitDeclStmt(CondDecl))
+        return false;
 
-  if (!this->visitBool(Cond))
-    return false;
-  if (!this->jumpFalse(EndLabel))
-    return false;
+    if (!this->visitBool(Cond))
+      return false;
+    if (!this->jumpFalse(EndLabel))
+      return false;
 
-  if (!this->visitStmt(Body))
-    return false;
+    if (!this->visitStmt(Body))
+      return false;
 
+    if (!CondScope.destroyLocals())
+      return false;
+  }
   if (!this->jump(CondLabel))
     return false;
   this->fallthrough(EndLabel);
@@ -4466,13 +4649,18 @@ template <class Emitter> bool Compiler<Emitter>::visitDoStmt(const DoStmt *S) {
 
   this->fallthrough(StartLabel);
   this->emitLabel(StartLabel);
+
   {
+    LocalScope<Emitter> CondScope(this);
     if (!this->visitStmt(Body))
       return false;
     this->fallthrough(CondLabel);
     this->emitLabel(CondLabel);
     if (!this->visitBool(Cond))
       return false;
+
+    if (!CondScope.destroyLocals())
+      return false;
   }
   if (!this->jumpTrue(StartLabel))
     return false;
@@ -4501,18 +4689,19 @@ bool Compiler<Emitter>::visitForStmt(const ForStmt *S) {
   this->fallthrough(CondLabel);
   this->emitLabel(CondLabel);
 
-  if (const DeclStmt *CondDecl = S->getConditionVariableDeclStmt())
-    if (!visitDeclStmt(CondDecl))
-      return false;
+  {
+    LocalScope<Emitter> CondScope(this);
+    if (const DeclStmt *CondDecl = S->getConditionVariableDeclStmt())
+      if (!visitDeclStmt(CondDecl))
+        return false;
 
-  if (Cond) {
-    if (!this->visitBool(Cond))
-      return false;
-    if (!this->jumpFalse(EndLabel))
-      return false;
-  }
+    if (Cond) {
+      if (!this->visitBool(Cond))
+        return false;
+      if (!this->jumpFalse(EndLabel))
+        return false;
+    }
 
-  {
     if (Body && !this->visitStmt(Body))
       return false;
 
@@ -4520,10 +4709,13 @@ bool Compiler<Emitter>::visitForStmt(const ForStmt *S) {
     this->emitLabel(IncLabel);
     if (Inc && !this->discard(Inc))
       return false;
-  }
 
+    if (!CondScope.destroyLocals())
+      return false;
+  }
   if (!this->jump(CondLabel))
     return false;
+
   this->fallthrough(EndLabel);
   this->emitLabel(EndLabel);
   return true;
@@ -4590,7 +4782,9 @@ bool Compiler<Emitter>::visitBreakStmt(const BreakStmt *S) {
   if (!BreakLabel)
     return false;
 
-  this->emitCleanup();
+  for (VariableScope<Emitter> *C = VarScope; C != BreakVarScope;
+       C = C->getParent())
+    C->emitDestruction();
   return this->jump(*BreakLabel);
 }
 
@@ -4599,7 +4793,9 @@ bool Compiler<Emitter>::visitContinueStmt(const ContinueStmt *S) {
   if (!ContinueLabel)
     return false;
 
-  this->emitCleanup();
+  for (VariableScope<Emitter> *C = VarScope;
+       C && C->getParent() != ContinueVarScope; C = C->getParent())
+    C->emitDestruction();
   return this->jump(*ContinueLabel);
 }
 
@@ -4607,6 +4803,7 @@ template <class Emitter>
 bool Compiler<Emitter>::visitSwitchStmt(const SwitchStmt *S) {
   const Expr *Cond = S->getCond();
   PrimType CondT = this->classifyPrim(Cond->getType());
+  LocalScope<Emitter> LS(this);
 
   LabelTy EndLabel = this->getLabel();
   OptLabelTy DefaultLabel = std::nullopt;
@@ -4670,7 +4867,8 @@ bool Compiler<Emitter>::visitSwitchStmt(const SwitchStmt *S) {
   if (!this->visitStmt(S->getBody()))
     return false;
   this->emitLabel(EndLabel);
-  return true;
+
+  return LS.destroyLocals();
 }
 
 template <class Emitter>
@@ -5507,11 +5705,18 @@ bool Compiler<Emitter>::visitDeclRef(const ValueDecl *D, const Expr *E) {
         if (isa<DecompositionDecl>(VD))
           return revisit(VD);
 
-        // Visit local const variables like normal.
-        if ((VD->hasGlobalStorage() || VD->isLocalVarDecl() ||
-             VD->isStaticDataMember()) &&
+        if ((VD->hasGlobalStorage() || VD->isStaticDataMember()) &&
             typeShouldBeVisited(VD->getType()))
           return revisit(VD);
+
+        // FIXME: The evaluateValue() check here is a little ridiculous, since
+        // it will ultimately call into Context::evaluateAsInitializer(). In
+        // other words, we're evaluating the initializer, just to know if we can
+        // evaluate the initializer.
+        if (VD->isLocalVarDecl() && typeShouldBeVisited(VD->getType()) &&
+            VD->getInit() && !VD->getInit()->isValueDependent() &&
+            VD->evaluateValue())
+          return revisit(VD);
       }
     } else {
       if (const auto *VD = dyn_cast<VarDecl>(D);
diff --git clang/lib/AST/ByteCode/Compiler.h clang/lib/AST/ByteCode/Compiler.h
index b18afacdb2e4..2dfa187713a8 100644
--- clang/lib/AST/ByteCode/Compiler.h
+++ clang/lib/AST/ByteCode/Compiler.h
@@ -130,9 +130,10 @@ public:
   bool VisitLogicalBinOp(const BinaryOperator *E);
   bool VisitPointerArithBinOp(const BinaryOperator *E);
   bool VisitComplexBinOp(const BinaryOperator *E);
+  bool VisitVectorBinOp(const BinaryOperator *E);
   bool VisitCXXDefaultArgExpr(const CXXDefaultArgExpr *E);
   bool VisitCallExpr(const CallExpr *E);
-  bool VisitBuiltinCallExpr(const CallExpr *E);
+  bool VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinID);
   bool VisitCXXDefaultInitExpr(const CXXDefaultInitExpr *E);
   bool VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *E);
   bool VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *E);
@@ -221,7 +222,7 @@ public:
 
 protected:
   bool visitStmt(const Stmt *S);
-  bool visitExpr(const Expr *E) override;
+  bool visitExpr(const Expr *E, bool DestroyToplevelScope) override;
   bool visitFunc(const FunctionDecl *F) override;
 
   bool visitDeclAndReturn(const VarDecl *VD, bool ConstantContext) override;
@@ -363,7 +364,6 @@ private:
   bool emitComplexBoolCast(const Expr *E);
   bool emitComplexComparison(const Expr *LHS, const Expr *RHS,
                              const BinaryOperator *E);
-
   bool emitRecordDestruction(const Record *R);
   bool emitDestruction(const Descriptor *Desc);
   unsigned collectBaseOffset(const QualType BaseType,
@@ -409,8 +409,12 @@ protected:
   /// Switch case mapping.
   CaseMap CaseLabels;
 
+  /// Scope to cleanup until when we see a break statement.
+  VariableScope<Emitter> *BreakVarScope = nullptr;
   /// Point to break to.
   OptLabelTy BreakLabel;
+  /// Scope to cleanup until when we see a continue statement.
+  VariableScope<Emitter> *ContinueVarScope = nullptr;
   /// Point to continue to.
   OptLabelTy ContinueLabel;
   /// Default case label.
@@ -531,7 +535,7 @@ public:
       return true;
     // Emit destructor calls for local variables of record
     // type with a destructor.
-    for (Scope::Local &Local : this->Ctx->Descriptors[*Idx]) {
+    for (Scope::Local &Local : llvm::reverse(this->Ctx->Descriptors[*Idx])) {
       if (!Local.Desc->isPrimitive() && !Local.Desc->isPrimitiveArray()) {
         if (!this->Ctx->emitGetPtrLocal(Local.Offset, E))
           return false;
diff --git clang/lib/AST/ByteCode/Context.cpp clang/lib/AST/ByteCode/Context.cpp
index e682d87b703a..8661acf53665 100644
--- clang/lib/AST/ByteCode/Context.cpp
+++ clang/lib/AST/ByteCode/Context.cpp
@@ -44,13 +44,14 @@ bool Context::isPotentialConstantExpr(State &Parent, const FunctionDecl *FD) {
 bool Context::evaluateAsRValue(State &Parent, const Expr *E, APValue &Result) {
   ++EvalID;
   bool Recursing = !Stk.empty();
+  size_t StackSizeBefore = Stk.size();
   Compiler<EvalEmitter> C(*this, *P, Parent, Stk);
 
   auto Res = C.interpretExpr(E, /*ConvertResultToRValue=*/E->isGLValue());
 
   if (Res.isInvalid()) {
     C.cleanup();
-    Stk.clear();
+    Stk.clearTo(StackSizeBefore);
     return false;
   }
 
@@ -60,7 +61,7 @@ bool Context::evaluateAsRValue(State &Parent, const Expr *E, APValue &Result) {
 #ifndef NDEBUG
     // Make sure we don't rely on some value being still alive in
     // InterpStack memory.
-    Stk.clear();
+    Stk.clearTo(StackSizeBefore);
 #endif
   }
 
@@ -69,15 +70,19 @@ bool Context::evaluateAsRValue(State &Parent, const Expr *E, APValue &Result) {
   return true;
 }
 
-bool Context::evaluate(State &Parent, const Expr *E, APValue &Result) {
+bool Context::evaluate(State &Parent, const Expr *E, APValue &Result,
+                       ConstantExprKind Kind) {
   ++EvalID;
   bool Recursing = !Stk.empty();
+  size_t StackSizeBefore = Stk.size();
   Compiler<EvalEmitter> C(*this, *P, Parent, Stk);
 
-  auto Res = C.interpretExpr(E);
+  auto Res = C.interpretExpr(E, /*ConvertResultToRValue=*/false,
+                             /*DestroyToplevelScope=*/Kind ==
+                                 ConstantExprKind::ClassTemplateArgument);
   if (Res.isInvalid()) {
     C.cleanup();
-    Stk.clear();
+    Stk.clearTo(StackSizeBefore);
     return false;
   }
 
@@ -87,7 +92,7 @@ bool Context::evaluate(State &Parent, const Expr *E, APValue &Result) {
 #ifndef NDEBUG
     // Make sure we don't rely on some value being still alive in
     // InterpStack memory.
-    Stk.clear();
+    Stk.clearTo(StackSizeBefore);
 #endif
   }
 
@@ -99,6 +104,7 @@ bool Context::evaluateAsInitializer(State &Parent, const VarDecl *VD,
                                     APValue &Result) {
   ++EvalID;
   bool Recursing = !Stk.empty();
+  size_t StackSizeBefore = Stk.size();
   Compiler<EvalEmitter> C(*this, *P, Parent, Stk);
 
   bool CheckGlobalInitialized =
@@ -107,7 +113,8 @@ bool Context::evaluateAsInitializer(State &Parent, const VarDecl *VD,
   auto Res = C.interpretDecl(VD, CheckGlobalInitialized);
   if (Res.isInvalid()) {
     C.cleanup();
-    Stk.clear();
+    Stk.clearTo(StackSizeBefore);
+
     return false;
   }
 
@@ -117,7 +124,7 @@ bool Context::evaluateAsInitializer(State &Parent, const VarDecl *VD,
 #ifndef NDEBUG
     // Make sure we don't rely on some value being still alive in
     // InterpStack memory.
-    Stk.clear();
+    Stk.clearTo(StackSizeBefore);
 #endif
   }
 
diff --git clang/lib/AST/ByteCode/Context.h clang/lib/AST/ByteCode/Context.h
index b8ea4ad6b3b4..e0d4bafdebaf 100644
--- clang/lib/AST/ByteCode/Context.h
+++ clang/lib/AST/ByteCode/Context.h
@@ -52,7 +52,8 @@ public:
   bool evaluateAsRValue(State &Parent, const Expr *E, APValue &Result);
 
   /// Like evaluateAsRvalue(), but does no implicit lvalue-to-rvalue conversion.
-  bool evaluate(State &Parent, const Expr *E, APValue &Result);
+  bool evaluate(State &Parent, const Expr *E, APValue &Result,
+                ConstantExprKind Kind);
 
   /// Evaluates a toplevel initializer.
   bool evaluateAsInitializer(State &Parent, const VarDecl *VD, APValue &Result);
diff --git clang/lib/AST/ByteCode/DynamicAllocator.cpp clang/lib/AST/ByteCode/DynamicAllocator.cpp
index a51599774078..819fbdb8b070 100644
--- clang/lib/AST/ByteCode/DynamicAllocator.cpp
+++ clang/lib/AST/ByteCode/DynamicAllocator.cpp
@@ -40,27 +40,30 @@ void DynamicAllocator::cleanup() {
 }
 
 Block *DynamicAllocator::allocate(const Expr *Source, PrimType T,
-                                  size_t NumElements, unsigned EvalID) {
+                                  size_t NumElements, unsigned EvalID,
+                                  Form AllocForm) {
   // Create a new descriptor for an array of the specified size and
   // element type.
   const Descriptor *D = allocateDescriptor(
       Source, T, Descriptor::InlineDescMD, NumElements, /*IsConst=*/false,
       /*IsTemporary=*/false, /*IsMutable=*/false);
 
-  return allocate(D, EvalID);
+  return allocate(D, EvalID, AllocForm);
 }
 
 Block *DynamicAllocator::allocate(const Descriptor *ElementDesc,
-                                  size_t NumElements, unsigned EvalID) {
+                                  size_t NumElements, unsigned EvalID,
+                                  Form AllocForm) {
   // Create a new descriptor for an array of the specified size and
   // element type.
   const Descriptor *D = allocateDescriptor(
       ElementDesc->asExpr(), ElementDesc, Descriptor::InlineDescMD, NumElements,
       /*IsConst=*/false, /*IsTemporary=*/false, /*IsMutable=*/false);
-  return allocate(D, EvalID);
+  return allocate(D, EvalID, AllocForm);
 }
 
-Block *DynamicAllocator::allocate(const Descriptor *D, unsigned EvalID) {
+Block *DynamicAllocator::allocate(const Descriptor *D, unsigned EvalID,
+                                  Form AllocForm) {
   assert(D);
   assert(D->asExpr());
 
@@ -84,7 +87,7 @@ Block *DynamicAllocator::allocate(const Descriptor *D, unsigned EvalID) {
     It->second.Allocations.emplace_back(std::move(Memory));
   else
     AllocationSites.insert(
-        {D->asExpr(), AllocationSite(std::move(Memory), D->isArray())});
+        {D->asExpr(), AllocationSite(std::move(Memory), AllocForm)});
   return B;
 }
 
diff --git clang/lib/AST/ByteCode/DynamicAllocator.h clang/lib/AST/ByteCode/DynamicAllocator.h
index a84600aa54cc..1ed5dc843e4c 100644
--- clang/lib/AST/ByteCode/DynamicAllocator.h
+++ clang/lib/AST/ByteCode/DynamicAllocator.h
@@ -31,6 +31,14 @@ class InterpState;
 /// For all array allocations, we need to allocate new Descriptor instances,
 /// so the DynamicAllocator has a llvm::BumpPtrAllocator similar to Program.
 class DynamicAllocator final {
+public:
+  enum class Form : uint8_t {
+    NonArray,
+    Array,
+    Operator,
+  };
+
+private:
   struct Allocation {
     std::unique_ptr<std::byte[]> Memory;
     Allocation(std::unique_ptr<std::byte[]> Memory)
@@ -39,10 +47,10 @@ class DynamicAllocator final {
 
   struct AllocationSite {
     llvm::SmallVector<Allocation> Allocations;
-    bool IsArrayAllocation = false;
+    Form AllocForm;
 
-    AllocationSite(std::unique_ptr<std::byte[]> Memory, bool Array)
-        : IsArrayAllocation(Array) {
+    AllocationSite(std::unique_ptr<std::byte[]> Memory, Form AllocForm)
+        : AllocForm(AllocForm) {
       Allocations.push_back({std::move(Memory)});
     }
 
@@ -58,12 +66,13 @@ public:
   unsigned getNumAllocations() const { return AllocationSites.size(); }
 
   /// Allocate ONE element of the given descriptor.
-  Block *allocate(const Descriptor *D, unsigned EvalID);
+  Block *allocate(const Descriptor *D, unsigned EvalID, Form AllocForm);
   /// Allocate \p NumElements primitive elements of the given type.
   Block *allocate(const Expr *Source, PrimType T, size_t NumElements,
-                  unsigned EvalID);
+                  unsigned EvalID, Form AllocForm);
   /// Allocate \p NumElements elements of the given descriptor.
-  Block *allocate(const Descriptor *D, size_t NumElements, unsigned EvalID);
+  Block *allocate(const Descriptor *D, size_t NumElements, unsigned EvalID,
+                  Form AllocForm);
 
   /// Deallocate the given source+block combination.
   /// Returns \c true if anything has been deallocatd, \c false otherwise.
@@ -72,10 +81,10 @@ public:
 
   /// Checks whether the allocation done at the given source is an array
   /// allocation.
-  bool isArrayAllocation(const Expr *Source) const {
+  std::optional<Form> getAllocationForm(const Expr *Source) const {
     if (auto It = AllocationSites.find(Source); It != AllocationSites.end())
-      return It->second.IsArrayAllocation;
-    return false;
+      return It->second.AllocForm;
+    return std::nullopt;
   }
 
   /// Allocation site iterator.
diff --git clang/lib/AST/ByteCode/EvalEmitter.cpp clang/lib/AST/ByteCode/EvalEmitter.cpp
index 3b9e5f9f9f69..7eecee25bb3c 100644
--- clang/lib/AST/ByteCode/EvalEmitter.cpp
+++ clang/lib/AST/ByteCode/EvalEmitter.cpp
@@ -38,13 +38,14 @@ EvalEmitter::~EvalEmitter() {
 void EvalEmitter::cleanup() { S.cleanup(); }
 
 EvaluationResult EvalEmitter::interpretExpr(const Expr *E,
-                                            bool ConvertResultToRValue) {
+                                            bool ConvertResultToRValue,
+                                            bool DestroyToplevelScope) {
   S.setEvalLocation(E->getExprLoc());
   this->ConvertResultToRValue = ConvertResultToRValue && !isa<ConstantExpr>(E);
   this->CheckFullyInitialized = isa<ConstantExpr>(E);
   EvalResult.setSource(E);
 
-  if (!this->visitExpr(E)) {
+  if (!this->visitExpr(E, DestroyToplevelScope)) {
     // EvalResult may already have a result set, but something failed
     // after that (e.g. evaluating destructors).
     EvalResult.setInvalid();
diff --git clang/lib/AST/ByteCode/EvalEmitter.h clang/lib/AST/ByteCode/EvalEmitter.h
index 338786d3dea9..e7c9e80d75d9 100644
--- clang/lib/AST/ByteCode/EvalEmitter.h
+++ clang/lib/AST/ByteCode/EvalEmitter.h
@@ -35,7 +35,8 @@ public:
   using Local = Scope::Local;
 
   EvaluationResult interpretExpr(const Expr *E,
-                                 bool ConvertResultToRValue = false);
+                                 bool ConvertResultToRValue = false,
+                                 bool DestroyToplevelScope = false);
   EvaluationResult interpretDecl(const VarDecl *VD, bool CheckFullyInitialized);
 
   /// Clean up all resources.
@@ -54,7 +55,7 @@ protected:
   LabelTy getLabel();
 
   /// Methods implemented by the compiler.
-  virtual bool visitExpr(const Expr *E) = 0;
+  virtual bool visitExpr(const Expr *E, bool DestroyToplevelScope) = 0;
   virtual bool visitDeclAndReturn(const VarDecl *VD, bool ConstantContext) = 0;
   virtual bool visitFunc(const FunctionDecl *F) = 0;
 
diff --git clang/lib/AST/ByteCode/EvaluationResult.cpp clang/lib/AST/ByteCode/EvaluationResult.cpp
index bdebd19af9f9..627d4b2f65be 100644
--- clang/lib/AST/ByteCode/EvaluationResult.cpp
+++ clang/lib/AST/ByteCode/EvaluationResult.cpp
@@ -178,8 +178,8 @@ bool EvaluationResult::checkFullyInitialized(InterpState &S,
 static void collectBlocks(const Pointer &Ptr,
                           llvm::SetVector<const Block *> &Blocks) {
   auto isUsefulPtr = [](const Pointer &P) -> bool {
-    return P.isLive() && !P.isZero() && !P.isDummy() &&
-           !P.isUnknownSizeArray() && !P.isOnePastEnd() && P.isBlockPointer();
+    return P.isLive() && !P.isZero() && !P.isDummy() && P.isDereferencable() &&
+           !P.isUnknownSizeArray() && !P.isOnePastEnd();
   };
 
   if (!isUsefulPtr(Ptr))
diff --git clang/lib/AST/ByteCode/Function.h clang/lib/AST/ByteCode/Function.h
index b21fa8497130..640bfa65644f 100644
--- clang/lib/AST/ByteCode/Function.h
+++ clang/lib/AST/ByteCode/Function.h
@@ -103,10 +103,10 @@ public:
   /// Returns the name of the function decl this code
   /// was generated for.
   const std::string getName() const {
-    if (!Source)
+    if (!Source || !getDecl())
       return "<<expr>>";
 
-    return Source.get<const FunctionDecl *>()->getQualifiedNameAsString();
+    return getDecl()->getQualifiedNameAsString();
   }
 
   /// Returns a parameter descriptor.
diff --git clang/lib/AST/ByteCode/Interp.cpp clang/lib/AST/ByteCode/Interp.cpp
index 6777ac150abf..827a177f9bf8 100644
--- clang/lib/AST/ByteCode/Interp.cpp
+++ clang/lib/AST/ByteCode/Interp.cpp
@@ -221,17 +221,17 @@ static void popArg(InterpState &S, const Expr *Arg) {
   TYPE_SWITCH(Ty, S.Stk.discard<T>());
 }
 
-void cleanupAfterFunctionCall(InterpState &S, CodePtr OpPC) {
+void cleanupAfterFunctionCall(InterpState &S, CodePtr OpPC,
+                              const Function *Func) {
   assert(S.Current);
-  const Function *CurFunc = S.Current->getFunction();
-  assert(CurFunc);
+  assert(Func);
 
-  if (CurFunc->isUnevaluatedBuiltin())
+  if (Func->isUnevaluatedBuiltin())
     return;
 
   // Some builtin functions require us to only look at the call site, since
   // the classified parameter types do not match.
-  if (unsigned BID = CurFunc->getBuiltinID();
+  if (unsigned BID = Func->getBuiltinID();
       BID && S.getASTContext().BuiltinInfo.hasCustomTypechecking(BID)) {
     const auto *CE =
         cast<CallExpr>(S.Current->Caller->getExpr(S.Current->getRetPC()));
@@ -242,7 +242,7 @@ void cleanupAfterFunctionCall(InterpState &S, CodePtr OpPC) {
     return;
   }
 
-  if (S.Current->Caller && CurFunc->isVariadic()) {
+  if (S.Current->Caller && Func->isVariadic()) {
     // CallExpr we're look for is at the return PC of the current function, i.e.
     // in the caller.
     // This code path should be executed very rarely.
@@ -259,8 +259,8 @@ void cleanupAfterFunctionCall(InterpState &S, CodePtr OpPC) {
     } else
       assert(false && "Can't get arguments from that expression type");
 
-    assert(NumArgs >= CurFunc->getNumWrittenParams());
-    NumVarArgs = NumArgs - (CurFunc->getNumWrittenParams() +
+    assert(NumArgs >= Func->getNumWrittenParams());
+    NumVarArgs = NumArgs - (Func->getNumWrittenParams() +
                             isa<CXXOperatorCallExpr>(CallSite));
     for (unsigned I = 0; I != NumVarArgs; ++I) {
       const Expr *A = Args[NumArgs - 1 - I];
@@ -270,7 +270,8 @@ void cleanupAfterFunctionCall(InterpState &S, CodePtr OpPC) {
 
   // And in any case, remove the fixed parameters (the non-variadic ones)
   // at the end.
-  S.Current->popArgs();
+  for (PrimType Ty : Func->args_reverse())
+    TYPE_SWITCH(Ty, S.Stk.discard<T>());
 }
 
 bool CheckExtern(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
@@ -811,10 +812,11 @@ bool CheckDynamicMemoryAllocation(InterpState &S, CodePtr OpPC) {
   return true;
 }
 
-bool CheckNewDeleteForms(InterpState &S, CodePtr OpPC, bool NewWasArray,
-                         bool DeleteIsArray, const Descriptor *D,
+bool CheckNewDeleteForms(InterpState &S, CodePtr OpPC,
+                         DynamicAllocator::Form AllocForm,
+                         DynamicAllocator::Form DeleteForm, const Descriptor *D,
                          const Expr *NewExpr) {
-  if (NewWasArray == DeleteIsArray)
+  if (AllocForm == DeleteForm)
     return true;
 
   QualType TypeToDiagnose;
@@ -831,7 +833,8 @@ bool CheckNewDeleteForms(InterpState &S, CodePtr OpPC, bool NewWasArray,
 
   const SourceInfo &E = S.Current->getSource(OpPC);
   S.FFDiag(E, diag::note_constexpr_new_delete_mismatch)
-      << DeleteIsArray << 0 << TypeToDiagnose;
+      << static_cast<int>(DeleteForm) << static_cast<int>(AllocForm)
+      << TypeToDiagnose;
   S.Note(NewExpr->getExprLoc(), diag::note_constexpr_dynamic_alloc_here)
       << NewExpr->getSourceRange();
   return false;
@@ -839,7 +842,12 @@ bool CheckNewDeleteForms(InterpState &S, CodePtr OpPC, bool NewWasArray,
 
 bool CheckDeleteSource(InterpState &S, CodePtr OpPC, const Expr *Source,
                        const Pointer &Ptr) {
-  if (Source && isa<CXXNewExpr>(Source))
+  // The two sources we currently allow are new expressions and
+  // __builtin_operator_new calls.
+  if (isa_and_nonnull<CXXNewExpr>(Source))
+    return true;
+  if (const CallExpr *CE = dyn_cast_if_present<CallExpr>(Source);
+      CE && CE->getBuiltinCallee() == Builtin::BI__builtin_operator_new)
     return true;
 
   // Whatever this is, we didn't heap allocate it.
@@ -875,7 +883,7 @@ bool CheckDummy(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
     return diagnoseUnknownDecl(S, OpPC, D);
 
   assert(AK == AK_Assign);
-  if (S.getLangOpts().CPlusPlus11) {
+  if (S.getLangOpts().CPlusPlus14) {
     const SourceInfo &E = S.Current->getSource(OpPC);
     S.FFDiag(E, diag::note_constexpr_modify_global);
   }
@@ -1036,6 +1044,12 @@ bool CallVar(InterpState &S, CodePtr OpPC, const Function *Func,
 
 bool Call(InterpState &S, CodePtr OpPC, const Function *Func,
           uint32_t VarArgSize) {
+  assert(Func);
+  auto cleanup = [&]() -> bool {
+    cleanupAfterFunctionCall(S, OpPC, Func);
+    return false;
+  };
+
   if (Func->hasThisPointer()) {
     size_t ArgSize = Func->getArgSize() + VarArgSize;
     size_t ThisOffset = ArgSize - (Func->hasRVO() ? primSize(PT_Ptr) : 0);
@@ -1052,22 +1066,22 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func,
       assert(ThisPtr.isZero());
     } else {
       if (!CheckInvoke(S, OpPC, ThisPtr))
-        return false;
+        return cleanup();
     }
   }
 
   if (!CheckCallable(S, OpPC, Func))
-    return false;
+    return cleanup();
 
   // FIXME: The isConstructor() check here is not always right. The current
   // constant evaluator is somewhat inconsistent in when it allows a function
   // call when checking for a constant expression.
   if (Func->hasThisPointer() && S.checkingPotentialConstantExpression() &&
       !Func->isConstructor())
-    return false;
+    return cleanup();
 
   if (!CheckCallDepth(S, OpPC))
-    return false;
+    return cleanup();
 
   auto NewFrame = std::make_unique<InterpFrame>(S, Func, OpPC, VarArgSize);
   InterpFrame *FrameBefore = S.Current;
@@ -1163,14 +1177,16 @@ bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func,
   return true;
 }
 
-bool CallBI(InterpState &S, CodePtr &PC, const Function *Func,
-            const CallExpr *CE) {
-  auto NewFrame = std::make_unique<InterpFrame>(S, Func, PC);
+bool CallBI(InterpState &S, CodePtr OpPC, const Function *Func,
+            const CallExpr *CE, uint32_t BuiltinID) {
+  if (S.checkingPotentialConstantExpression())
+    return false;
+  auto NewFrame = std::make_unique<InterpFrame>(S, Func, OpPC);
 
   InterpFrame *FrameBefore = S.Current;
   S.Current = NewFrame.get();
 
-  if (InterpretBuiltin(S, PC, Func, CE)) {
+  if (InterpretBuiltin(S, OpPC, Func, CE, BuiltinID)) {
     NewFrame.release();
     return true;
   }
diff --git clang/lib/AST/ByteCode/Interp.h clang/lib/AST/ByteCode/Interp.h
index be900769f258..3d507e2e2ba7 100644
--- clang/lib/AST/ByteCode/Interp.h
+++ clang/lib/AST/ByteCode/Interp.h
@@ -130,8 +130,9 @@ bool CheckNonNullArgs(InterpState &S, CodePtr OpPC, const Function *F,
 bool CheckDynamicMemoryAllocation(InterpState &S, CodePtr OpPC);
 
 /// Diagnose mismatched new[]/delete or new/delete[] pairs.
-bool CheckNewDeleteForms(InterpState &S, CodePtr OpPC, bool NewWasArray,
-                         bool DeleteIsArray, const Descriptor *D,
+bool CheckNewDeleteForms(InterpState &S, CodePtr OpPC,
+                         DynamicAllocator::Form AllocForm,
+                         DynamicAllocator::Form DeleteForm, const Descriptor *D,
                          const Expr *NewExpr);
 
 /// Check the source of the pointer passed to delete/delete[] has actually
@@ -153,8 +154,8 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func,
           uint32_t VarArgSize);
 bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func,
               uint32_t VarArgSize);
-bool CallBI(InterpState &S, CodePtr &PC, const Function *Func,
-            const CallExpr *CE);
+bool CallBI(InterpState &S, CodePtr OpPC, const Function *Func,
+            const CallExpr *CE, uint32_t BuiltinID);
 bool CallPtr(InterpState &S, CodePtr OpPC, uint32_t ArgSize,
              const CallExpr *CE);
 
@@ -267,7 +268,7 @@ bool Interpret(InterpState &S, APValue &Result);
 
 /// Interpret a builtin function.
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
-                      const CallExpr *Call);
+                      const CallExpr *Call, uint32_t BuiltinID);
 
 /// Interpret an offsetof operation.
 bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
@@ -281,7 +282,8 @@ enum class ArithOp { Add, Sub };
 // Returning values
 //===----------------------------------------------------------------------===//
 
-void cleanupAfterFunctionCall(InterpState &S, CodePtr OpPC);
+void cleanupAfterFunctionCall(InterpState &S, CodePtr OpPC,
+                              const Function *Func);
 
 template <PrimType Name, class T = typename PrimConv<Name>::T>
 bool Ret(InterpState &S, CodePtr &PC, APValue &Result) {
@@ -302,7 +304,7 @@ bool Ret(InterpState &S, CodePtr &PC, APValue &Result) {
   assert(S.Current);
   assert(S.Current->getFrameOffset() == S.Stk.size() && "Invalid frame");
   if (!S.checkingPotentialConstantExpression() || S.Current->Caller)
-    cleanupAfterFunctionCall(S, PC);
+    cleanupAfterFunctionCall(S, PC, S.Current->getFunction());
 
   if (InterpFrame *Caller = S.Current->Caller) {
     PC = S.Current->getRetPC();
@@ -322,7 +324,7 @@ inline bool RetVoid(InterpState &S, CodePtr &PC, APValue &Result) {
   assert(S.Current->getFrameOffset() == S.Stk.size() && "Invalid frame");
 
   if (!S.checkingPotentialConstantExpression() || S.Current->Caller)
-    cleanupAfterFunctionCall(S, PC);
+    cleanupAfterFunctionCall(S, PC, S.Current->getFunction());
 
   if (InterpFrame *Caller = S.Current->Caller) {
     PC = S.Current->getRetPC();
@@ -1609,6 +1611,12 @@ inline bool GetPtrDerivedPop(InterpState &S, CodePtr OpPC, uint32_t Off) {
 
 inline bool GetPtrBase(InterpState &S, CodePtr OpPC, uint32_t Off) {
   const Pointer &Ptr = S.Stk.peek<Pointer>();
+
+  if (!Ptr.isBlockPointer()) {
+    S.Stk.push<Pointer>(Ptr.asIntPointer().baseCast(S.getASTContext(), Off));
+    return true;
+  }
+
   if (!CheckNull(S, OpPC, Ptr, CSK_Base))
     return false;
   if (!CheckSubobject(S, OpPC, Ptr, CSK_Base))
@@ -1622,6 +1630,12 @@ inline bool GetPtrBase(InterpState &S, CodePtr OpPC, uint32_t Off) {
 
 inline bool GetPtrBasePop(InterpState &S, CodePtr OpPC, uint32_t Off) {
   const Pointer &Ptr = S.Stk.pop<Pointer>();
+
+  if (!Ptr.isBlockPointer()) {
+    S.Stk.push<Pointer>(Ptr.asIntPointer().baseCast(S.getASTContext(), Off));
+    return true;
+  }
+
   if (!CheckNull(S, OpPC, Ptr, CSK_Base))
     return false;
   if (!CheckSubobject(S, OpPC, Ptr, CSK_Base))
@@ -1991,7 +2005,9 @@ bool OffsetHelper(InterpState &S, CodePtr OpPC, const T &Offset,
 template <PrimType Name, class T = typename PrimConv<Name>::T>
 bool AddOffset(InterpState &S, CodePtr OpPC) {
   const T &Offset = S.Stk.pop<T>();
-  const Pointer &Ptr = S.Stk.pop<Pointer>();
+  Pointer Ptr = S.Stk.pop<Pointer>();
+  if (Ptr.isBlockPointer())
+    Ptr = Ptr.expand();
   return OffsetHelper<T, ArithOp::Add>(S, OpPC, Offset, Ptr);
 }
 
@@ -2658,6 +2674,9 @@ inline bool Unsupported(InterpState &S, CodePtr OpPC) {
 
 /// Do nothing and just abort execution.
 inline bool Error(InterpState &S, CodePtr OpPC) { return false; }
+inline bool SideEffect(InterpState &S, CodePtr OpPC) {
+  return S.noteSideEffect();
+}
 
 /// Same here, but only for casts.
 inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind,
@@ -2793,10 +2812,11 @@ inline bool Alloc(InterpState &S, CodePtr OpPC, const Descriptor *Desc) {
     return false;
 
   DynamicAllocator &Allocator = S.getAllocator();
-  Block *B = Allocator.allocate(Desc, S.Ctx.getEvalID());
+  Block *B = Allocator.allocate(Desc, S.Ctx.getEvalID(),
+                                DynamicAllocator::Form::NonArray);
   assert(B);
 
-  S.Stk.push<Pointer>(B, sizeof(InlineDescriptor));
+  S.Stk.push<Pointer>(B);
 
   return true;
 }
@@ -2818,8 +2838,9 @@ inline bool AllocN(InterpState &S, CodePtr OpPC, PrimType T, const Expr *Source,
   }
 
   DynamicAllocator &Allocator = S.getAllocator();
-  Block *B = Allocator.allocate(Source, T, static_cast<size_t>(NumElements),
-                                S.Ctx.getEvalID());
+  Block *B =
+      Allocator.allocate(Source, T, static_cast<size_t>(NumElements),
+                         S.Ctx.getEvalID(), DynamicAllocator::Form::Array);
   assert(B);
   S.Stk.push<Pointer>(B, sizeof(InlineDescriptor));
 
@@ -2844,8 +2865,9 @@ inline bool AllocCN(InterpState &S, CodePtr OpPC, const Descriptor *ElementDesc,
   }
 
   DynamicAllocator &Allocator = S.getAllocator();
-  Block *B = Allocator.allocate(ElementDesc, static_cast<size_t>(NumElements),
-                                S.Ctx.getEvalID());
+  Block *B =
+      Allocator.allocate(ElementDesc, static_cast<size_t>(NumElements),
+                         S.Ctx.getEvalID(), DynamicAllocator::Form::Array);
   assert(B);
 
   S.Stk.push<Pointer>(B, sizeof(InlineDescriptor));
@@ -2890,8 +2912,9 @@ static inline bool Free(InterpState &S, CodePtr OpPC, bool DeleteIsArrayForm) {
     return false;
 
   DynamicAllocator &Allocator = S.getAllocator();
-  bool WasArrayAlloc = Allocator.isArrayAllocation(Source);
   const Descriptor *BlockDesc = BlockToDelete->getDescriptor();
+  std::optional<DynamicAllocator::Form> AllocForm =
+      Allocator.getAllocationForm(Source);
 
   if (!Allocator.deallocate(Source, BlockToDelete, S)) {
     // Nothing has been deallocated, this must be a double-delete.
@@ -2899,8 +2922,13 @@ static inline bool Free(InterpState &S, CodePtr OpPC, bool DeleteIsArrayForm) {
     S.FFDiag(Loc, diag::note_constexpr_double_delete);
     return false;
   }
-  return CheckNewDeleteForms(S, OpPC, WasArrayAlloc, DeleteIsArrayForm,
-                             BlockDesc, Source);
+
+  assert(AllocForm);
+  DynamicAllocator::Form DeleteForm = DeleteIsArrayForm
+                                          ? DynamicAllocator::Form::Array
+                                          : DynamicAllocator::Form::NonArray;
+  return CheckNewDeleteForms(S, OpPC, *AllocForm, DeleteForm, BlockDesc,
+                             Source);
 }
 
 static inline bool IsConstantContext(InterpState &S, CodePtr OpPC) {
diff --git clang/lib/AST/ByteCode/InterpBuiltin.cpp clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 81e49f203524..51c77b7da1a6 100644
--- clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -1244,14 +1244,164 @@ static bool interp__builtin_constant_p(InterpState &S, CodePtr OpPC,
   return returnInt(false);
 }
 
+static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC,
+                                         const InterpFrame *Frame,
+                                         const Function *Func,
+                                         const CallExpr *Call) {
+  // A call to __operator_new is only valid within std::allocate<>::allocate.
+  // Walk up the call stack to find the appropriate caller and get the
+  // element type from it.
+  QualType ElemType;
+
+  for (const InterpFrame *F = Frame; F; F = F->Caller) {
+    const Function *Func = F->getFunction();
+    if (!Func)
+      continue;
+    const auto *MD = dyn_cast_if_present<CXXMethodDecl>(Func->getDecl());
+    if (!MD)
+      continue;
+    const IdentifierInfo *FnII = MD->getIdentifier();
+    if (!FnII || !FnII->isStr("allocate"))
+      continue;
+
+    const auto *CTSD =
+        dyn_cast<ClassTemplateSpecializationDecl>(MD->getParent());
+    if (!CTSD)
+      continue;
+
+    const IdentifierInfo *ClassII = CTSD->getIdentifier();
+    const TemplateArgumentList &TAL = CTSD->getTemplateArgs();
+    if (CTSD->isInStdNamespace() && ClassII && ClassII->isStr("allocator") &&
+        TAL.size() >= 1 && TAL[0].getKind() == TemplateArgument::Type) {
+      ElemType = TAL[0].getAsType();
+      break;
+    }
+  }
+
+  if (ElemType.isNull()) {
+    S.FFDiag(Call, S.getLangOpts().CPlusPlus20
+                       ? diag::note_constexpr_new_untyped
+                       : diag::note_constexpr_new);
+    return false;
+  }
+
+  if (ElemType->isIncompleteType() || ElemType->isFunctionType()) {
+    S.FFDiag(Call, diag::note_constexpr_new_not_complete_object_type)
+        << (ElemType->isIncompleteType() ? 0 : 1) << ElemType;
+    return false;
+  }
+
+  APSInt Bytes = peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(0)));
+  CharUnits ElemSize = S.getASTContext().getTypeSizeInChars(ElemType);
+  assert(!ElemSize.isZero());
+  // Divide the number of bytes by sizeof(ElemType), so we get the number of
+  // elements we should allocate.
+  APInt NumElems, Remainder;
+  APInt ElemSizeAP(Bytes.getBitWidth(), ElemSize.getQuantity());
+  APInt::udivrem(Bytes, ElemSizeAP, NumElems, Remainder);
+  if (Remainder != 0) {
+    // This likely indicates a bug in the implementation of 'std::allocator'.
+    S.FFDiag(Call, diag::note_constexpr_operator_new_bad_size)
+        << Bytes << APSInt(ElemSizeAP, true) << ElemType;
+    return false;
+  }
+
+  // FIXME: CheckArraySize for NumElems?
+
+  std::optional<PrimType> ElemT = S.getContext().classify(ElemType);
+  DynamicAllocator &Allocator = S.getAllocator();
+  if (ElemT) {
+    if (NumElems.ule(1)) {
+      const Descriptor *Desc =
+          S.P.createDescriptor(Call, *ElemT, Descriptor::InlineDescMD,
+                               /*IsConst=*/false, /*IsTemporary=*/false,
+                               /*IsMutable=*/false);
+      Block *B = Allocator.allocate(Desc, S.getContext().getEvalID(),
+                                    DynamicAllocator::Form::Operator);
+      assert(B);
+
+      S.Stk.push<Pointer>(B);
+      return true;
+    }
+    assert(NumElems.ugt(1));
+
+    Block *B =
+        Allocator.allocate(Call, *ElemT, NumElems.getZExtValue(),
+                           S.Ctx.getEvalID(), DynamicAllocator::Form::Operator);
+    assert(B);
+    S.Stk.push<Pointer>(B);
+    return true;
+  }
+
+  assert(!ElemT);
+  // Structs etc.
+  const Descriptor *Desc = S.P.createDescriptor(
+      Call, ElemType.getTypePtr(),
+      NumElems.ule(1) ? std::nullopt : Descriptor::InlineDescMD,
+      /*IsConst=*/false, /*IsTemporary=*/false, /*IsMutable=*/false,
+      /*Init=*/nullptr);
+
+  if (NumElems.ule(1)) {
+    Block *B = Allocator.allocate(Desc, S.getContext().getEvalID(),
+                                  DynamicAllocator::Form::Operator);
+    assert(B);
+    S.Stk.push<Pointer>(B);
+    return true;
+  }
+
+  Block *B =
+      Allocator.allocate(Desc, NumElems.getZExtValue(), S.Ctx.getEvalID(),
+                         DynamicAllocator::Form::Operator);
+  assert(B);
+  S.Stk.push<Pointer>(B);
+  return true;
+}
+
+static bool interp__builtin_operator_delete(InterpState &S, CodePtr OpPC,
+                                            const InterpFrame *Frame,
+                                            const Function *Func,
+                                            const CallExpr *Call) {
+  const Expr *Source = nullptr;
+  const Block *BlockToDelete = nullptr;
+
+  {
+    const Pointer &Ptr = S.Stk.peek<Pointer>();
+
+    if (Ptr.isZero()) {
+      S.CCEDiag(Call, diag::note_constexpr_deallocate_null);
+      return true;
+    }
+
+    Source = Ptr.getDeclDesc()->asExpr();
+    BlockToDelete = Ptr.block();
+  }
+  assert(BlockToDelete);
+
+  DynamicAllocator &Allocator = S.getAllocator();
+  const Descriptor *BlockDesc = BlockToDelete->getDescriptor();
+  std::optional<DynamicAllocator::Form> AllocForm =
+      Allocator.getAllocationForm(Source);
+
+  if (!Allocator.deallocate(Source, BlockToDelete, S)) {
+    // Nothing has been deallocated, this must be a double-delete.
+    const SourceInfo &Loc = S.Current->getSource(OpPC);
+    S.FFDiag(Loc, diag::note_constexpr_double_delete);
+    return false;
+  }
+  assert(AllocForm);
+
+  return CheckNewDeleteForms(
+      S, OpPC, *AllocForm, DynamicAllocator::Form::Operator, BlockDesc, Source);
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
-                      const CallExpr *Call) {
+                      const CallExpr *Call, uint32_t BuiltinID) {
   const InterpFrame *Frame = S.Current;
   APValue Dummy;
 
   std::optional<PrimType> ReturnT = S.getContext().classify(Call);
 
-  switch (F->getBuiltinID()) {
+  switch (BuiltinID) {
   case Builtin::BI__builtin_is_constant_evaluated:
     if (!interp__builtin_is_constant_evaluated(S, OpPC, Frame, Call))
       return false;
@@ -1597,6 +1747,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
     pushInteger(S, 0, Call->getType());
     break;
 
+  case Builtin::BI__builtin_operator_new:
+    if (!interp__builtin_operator_new(S, OpPC, Frame, F, Call))
+      return false;
+    break;
+
+  case Builtin::BI__builtin_operator_delete:
+    if (!interp__builtin_operator_delete(S, OpPC, Frame, F, Call))
+      return false;
+    break;
+
   default:
     S.FFDiag(S.Current->getLocation(OpPC),
              diag::note_invalid_subexpr_in_const_expr)
diff --git clang/lib/AST/ByteCode/InterpFrame.cpp clang/lib/AST/ByteCode/InterpFrame.cpp
index c75386eaeb4c..28e189bb339e 100644
--- clang/lib/AST/ByteCode/InterpFrame.cpp
+++ clang/lib/AST/ByteCode/InterpFrame.cpp
@@ -96,11 +96,6 @@ void InterpFrame::destroy(unsigned Idx) {
   }
 }
 
-void InterpFrame::popArgs() {
-  for (PrimType Ty : Func->args_reverse())
-    TYPE_SWITCH(Ty, S.Stk.discard<T>());
-}
-
 template <typename T>
 static void print(llvm::raw_ostream &OS, const T &V, ASTContext &ASTCtx,
                   QualType Ty) {
@@ -212,31 +207,40 @@ Pointer InterpFrame::getParamPointer(unsigned Off) {
   return Pointer(B);
 }
 
+static bool funcHasUsableBody(const Function *F) {
+  assert(F);
+
+  if (F->isConstructor() || F->isDestructor())
+    return true;
+
+  return !F->getDecl()->isImplicit();
+}
+
 SourceInfo InterpFrame::getSource(CodePtr PC) const {
   // Implicitly created functions don't have any code we could point at,
   // so return the call site.
-  if (Func && (!Func->hasBody() || Func->getDecl()->isImplicit()) && Caller)
+  if (Func && !funcHasUsableBody(Func) && Caller)
     return Caller->getSource(RetPC);
 
   return S.getSource(Func, PC);
 }
 
 const Expr *InterpFrame::getExpr(CodePtr PC) const {
-  if (Func && (!Func->hasBody() || Func->getDecl()->isImplicit()) && Caller)
-    return Caller->getExpr(RetPC);
+  if (Func && !funcHasUsableBody(Func) && Caller)
+    return Caller->getExpr(PC);
 
   return S.getExpr(Func, PC);
 }
 
 SourceLocation InterpFrame::getLocation(CodePtr PC) const {
-  if (Func && (!Func->hasBody() || Func->getDecl()->isImplicit()) && Caller)
+  if (Func && !funcHasUsableBody(Func) && Caller)
     return Caller->getLocation(RetPC);
 
   return S.getLocation(Func, PC);
 }
 
 SourceRange InterpFrame::getRange(CodePtr PC) const {
-  if (Func && (!Func->hasBody() || Func->getDecl()->isImplicit()) && Caller)
+  if (Func && !funcHasUsableBody(Func) && Caller)
     return Caller->getRange(RetPC);
 
   return S.getRange(Func, PC);
diff --git clang/lib/AST/ByteCode/InterpFrame.h clang/lib/AST/ByteCode/InterpFrame.h
index 1e0d2b1d4424..802777a523d9 100644
--- clang/lib/AST/ByteCode/InterpFrame.h
+++ clang/lib/AST/ByteCode/InterpFrame.h
@@ -46,9 +46,6 @@ public:
   void destroy(unsigned Idx);
   void initScope(unsigned Idx);
 
-  /// Pops the arguments off the stack.
-  void popArgs();
-
   /// Describes the frame with arguments for diagnostic purposes.
   void describe(llvm::raw_ostream &OS) const override;
 
diff --git clang/lib/AST/ByteCode/InterpStack.cpp clang/lib/AST/ByteCode/InterpStack.cpp
index b8cdaeee7216..ae3721e98374 100644
--- clang/lib/AST/ByteCode/InterpStack.cpp
+++ clang/lib/AST/ByteCode/InterpStack.cpp
@@ -32,6 +32,16 @@ void InterpStack::clear() {
 #endif
 }
 
+void InterpStack::clearTo(size_t NewSize) {
+  assert(NewSize <= size());
+  size_t ToShrink = size() - NewSize;
+  if (ToShrink == 0)
+    return;
+
+  shrink(ToShrink);
+  assert(size() == NewSize);
+}
+
 void *InterpStack::grow(size_t Size) {
   assert(Size < ChunkSize - sizeof(StackChunk) && "Object too large");
 
@@ -81,6 +91,21 @@ void InterpStack::shrink(size_t Size) {
 
   Chunk->End -= Size;
   StackSize -= Size;
+
+#ifndef NDEBUG
+  size_t TypesSize = 0;
+  for (PrimType T : ItemTypes)
+    TYPE_SWITCH(T, { TypesSize += aligned_size<T>(); });
+
+  size_t StackSize = size();
+  while (TypesSize > StackSize) {
+    TYPE_SWITCH(ItemTypes.back(), {
+      TypesSize -= aligned_size<T>();
+      ItemTypes.pop_back();
+    });
+  }
+  assert(TypesSize == StackSize);
+#endif
 }
 
 void InterpStack::dump() const {
diff --git clang/lib/AST/ByteCode/InterpStack.h clang/lib/AST/ByteCode/InterpStack.h
index 153d17f0d70e..43988bb680d1 100644
--- clang/lib/AST/ByteCode/InterpStack.h
+++ clang/lib/AST/ByteCode/InterpStack.h
@@ -86,6 +86,7 @@ public:
 
   /// Clears the stack without calling any destructors.
   void clear();
+  void clearTo(size_t NewSize);
 
   /// Returns whether the stack is empty.
   bool empty() const { return StackSize == 0; }
diff --git clang/lib/AST/ByteCode/InterpState.h clang/lib/AST/ByteCode/InterpState.h
index 961ba5f5c28a..4b7371450cc9 100644
--- clang/lib/AST/ByteCode/InterpState.h
+++ clang/lib/AST/ByteCode/InterpState.h
@@ -68,6 +68,9 @@ public:
   bool keepEvaluatingAfterFailure() const override {
     return Parent.keepEvaluatingAfterFailure();
   }
+  bool keepEvaluatingAfterSideEffect() const override {
+    return Parent.keepEvaluatingAfterSideEffect();
+  }
   bool checkingPotentialConstantExpression() const override {
     return Parent.checkingPotentialConstantExpression();
   }
@@ -83,6 +86,7 @@ public:
     Parent.setFoldFailureDiagnostic(Flag);
   }
   bool hasPriorDiagnostic() override { return Parent.hasPriorDiagnostic(); }
+  bool noteSideEffect() override { return Parent.noteSideEffect(); }
 
   /// Reports overflow and return true if evaluation should continue.
   bool reportOverflow(const Expr *E, const llvm::APSInt &Value);
diff --git clang/lib/AST/ByteCode/Opcodes.td clang/lib/AST/ByteCode/Opcodes.td
index 67350abe5401..e3a88c069847 100644
--- clang/lib/AST/ByteCode/Opcodes.td
+++ clang/lib/AST/ByteCode/Opcodes.td
@@ -202,7 +202,7 @@ def CallVirt : Opcode {
 }
 
 def CallBI : Opcode {
-  let Args = [ArgFunction, ArgCallExpr];
+  let Args = [ArgFunction, ArgCallExpr, ArgUint32];
 }
 
 def CallPtr : Opcode {
@@ -732,6 +732,7 @@ def Flip : Opcode {
 def Invalid : Opcode {}
 def Unsupported : Opcode {}
 def Error : Opcode {}
+def SideEffect : Opcode {}
 def InvalidCast : Opcode {
   let Args = [ArgCastKind, ArgBool];
 }
diff --git clang/lib/AST/ByteCode/Pointer.cpp clang/lib/AST/ByteCode/Pointer.cpp
index 9eaf0db45c74..282953eb991a 100644
--- clang/lib/AST/ByteCode/Pointer.cpp
+++ clang/lib/AST/ByteCode/Pointer.cpp
@@ -667,3 +667,26 @@ IntPointer IntPointer::atOffset(const ASTContext &ASTCtx,
           .getQuantity();
   return IntPointer{this->Desc, this->Value + FieldOffset};
 }
+
+IntPointer IntPointer::baseCast(const ASTContext &ASTCtx,
+                                unsigned BaseOffset) const {
+  const Record *R = Desc->ElemRecord;
+  const Descriptor *BaseDesc = nullptr;
+
+  // This iterates over bases and checks for the proper offset. That's
+  // potentially slow but this case really shouldn't happen a lot.
+  for (const Record::Base &B : R->bases()) {
+    if (B.Offset == BaseOffset) {
+      BaseDesc = B.Desc;
+      break;
+    }
+  }
+  assert(BaseDesc);
+
+  // Adjust the offset value based on the information from the record layout.
+  const ASTRecordLayout &Layout = ASTCtx.getASTRecordLayout(R->getDecl());
+  CharUnits BaseLayoutOffset =
+      Layout.getBaseClassOffset(cast<CXXRecordDecl>(BaseDesc->asDecl()));
+
+  return {BaseDesc, Value + BaseLayoutOffset.getQuantity()};
+}
diff --git clang/lib/AST/ByteCode/Pointer.h clang/lib/AST/ByteCode/Pointer.h
index d05d8e9bc1f3..ac9b9ed4091b 100644
--- clang/lib/AST/ByteCode/Pointer.h
+++ clang/lib/AST/ByteCode/Pointer.h
@@ -46,6 +46,7 @@ struct IntPointer {
   uint64_t Value;
 
   IntPointer atOffset(const ASTContext &ASTCtx, unsigned Offset) const;
+  IntPointer baseCast(const ASTContext &ASTCtx, unsigned BaseOffset) const;
 };
 
 enum class Storage { Block, Int, Fn };
@@ -241,9 +242,8 @@ public:
     if (asBlockPointer().Base != Offset)
       return *this;
 
-    // If at base, point to an array of base types.
     if (isRoot())
-      return Pointer(Pointee, RootPtrMark, 0);
+      return Pointer(Pointee, asBlockPointer().Base, asBlockPointer().Base);
 
     // Step into the containing array, if inside one.
     unsigned Next = asBlockPointer().Base - getInlineDesc()->Offset;
@@ -711,8 +711,10 @@ private:
 
   /// Returns the embedded descriptor preceding a field.
   InlineDescriptor *getInlineDesc() const {
+    assert(isBlockPointer());
     assert(asBlockPointer().Base != sizeof(GlobalInlineDescriptor));
     assert(asBlockPointer().Base <= asBlockPointer().Pointee->getSize());
+    assert(asBlockPointer().Base >= sizeof(InlineDescriptor));
     return getDescriptor(asBlockPointer().Base);
   }
 
diff --git clang/lib/AST/ByteCode/Program.cpp clang/lib/AST/ByteCode/Program.cpp
index a4f0df8bf646..bd5860beabae 100644
--- clang/lib/AST/ByteCode/Program.cpp
+++ clang/lib/AST/ByteCode/Program.cpp
@@ -144,22 +144,33 @@ std::optional<unsigned> Program::getOrCreateGlobal(const ValueDecl *VD,
   return std::nullopt;
 }
 
-std::optional<unsigned> Program::getOrCreateDummy(const ValueDecl *VD) {
+std::optional<unsigned> Program::getOrCreateDummy(const DeclTy &D) {
+  assert(D);
   // Dedup blocks since they are immutable and pointers cannot be compared.
-  if (auto It = DummyVariables.find(VD); It != DummyVariables.end())
+  if (auto It = DummyVariables.find(D.getOpaqueValue());
+      It != DummyVariables.end())
     return It->second;
 
-  QualType QT = VD->getType();
-  if (const auto *RT = QT->getAs<ReferenceType>())
-    QT = RT->getPointeeType();
+  QualType QT;
+  if (const auto *E = D.dyn_cast<const Expr *>()) {
+    QT = E->getType();
+  } else {
+    const ValueDecl *VD = cast<ValueDecl>(D.get<const Decl *>());
+    QT = VD->getType();
+    if (const auto *RT = QT->getAs<ReferenceType>())
+      QT = RT->getPointeeType();
+  }
+  assert(!QT.isNull());
 
   Descriptor *Desc;
   if (std::optional<PrimType> T = Ctx.classify(QT))
-    Desc = createDescriptor(VD, *T, std::nullopt, true, false);
+    Desc = createDescriptor(D, *T, std::nullopt, /*IsTemporary=*/true,
+                            /*IsMutable=*/false);
   else
-    Desc = createDescriptor(VD, QT.getTypePtr(), std::nullopt, true, false);
+    Desc = createDescriptor(D, QT.getTypePtr(), std::nullopt,
+                            /*IsTemporary=*/true, /*IsMutable=*/false);
   if (!Desc)
-    Desc = allocateDescriptor(VD);
+    Desc = allocateDescriptor(D);
 
   assert(Desc);
   Desc->makeDummy();
@@ -175,7 +186,7 @@ std::optional<unsigned> Program::getOrCreateDummy(const ValueDecl *VD) {
   G->block()->invokeCtor();
 
   Globals.push_back(G);
-  DummyVariables[VD] = I;
+  DummyVariables[D.getOpaqueValue()] = I;
   return I;
 }
 
diff --git clang/lib/AST/ByteCode/Program.h clang/lib/AST/ByteCode/Program.h
index 7f69d9790fc7..bd2672a762b8 100644
--- clang/lib/AST/ByteCode/Program.h
+++ clang/lib/AST/ByteCode/Program.h
@@ -84,7 +84,7 @@ public:
                                             const Expr *Init = nullptr);
 
   /// Returns or creates a dummy value for unknown declarations.
-  std::optional<unsigned> getOrCreateDummy(const ValueDecl *VD);
+  std::optional<unsigned> getOrCreateDummy(const DeclTy &D);
 
   /// Creates a global and returns its index.
   std::optional<unsigned> createGlobal(const ValueDecl *VD, const Expr *Init);
@@ -209,7 +209,7 @@ private:
   llvm::DenseMap<const RecordDecl *, Record *> Records;
 
   /// Dummy parameter to generate pointers from.
-  llvm::DenseMap<const ValueDecl *, unsigned> DummyVariables;
+  llvm::DenseMap<const void *, unsigned> DummyVariables;
 
   /// Creates a new descriptor.
   template <typename... Ts> Descriptor *allocateDescriptor(Ts &&...Args) {
diff --git clang/lib/AST/ByteCode/State.h clang/lib/AST/ByteCode/State.h
index 2cffce4bc2ae..9a81fa6b7d22 100644
--- clang/lib/AST/ByteCode/State.h
+++ clang/lib/AST/ByteCode/State.h
@@ -34,6 +34,7 @@ enum AccessKinds {
   AK_TypeId,
   AK_Construct,
   AK_Destroy,
+  AK_IsWithinLifetime,
 };
 
 /// The order of this enum is important for diagnostics.
@@ -61,6 +62,7 @@ public:
   virtual bool checkingPotentialConstantExpression() const = 0;
   virtual bool noteUndefinedBehavior() = 0;
   virtual bool keepEvaluatingAfterFailure() const = 0;
+  virtual bool keepEvaluatingAfterSideEffect() const = 0;
   virtual Frame *getCurrentFrame() = 0;
   virtual const Frame *getBottomFrame() const = 0;
   virtual bool hasActiveDiagnostic() = 0;
@@ -70,6 +72,7 @@ public:
   virtual ASTContext &getASTContext() const = 0;
   virtual bool hasPriorDiagnostic() = 0;
   virtual unsigned getCallStackDepth() = 0;
+  virtual bool noteSideEffect() = 0;
 
 public:
   State() = default;
diff --git clang/lib/AST/Decl.cpp clang/lib/AST/Decl.cpp
index 1a0712581583..a14b1b33d35e 100644
--- clang/lib/AST/Decl.cpp
+++ clang/lib/AST/Decl.cpp
@@ -350,7 +350,8 @@ LinkageComputer::getLVForTemplateArgumentList(ArrayRef<TemplateArgument> Args,
     case TemplateArgument::Template:
     case TemplateArgument::TemplateExpansion:
       if (TemplateDecl *Template =
-              Arg.getAsTemplateOrTemplatePattern().getAsTemplateDecl())
+              Arg.getAsTemplateOrTemplatePattern().getAsTemplateDecl(
+                  /*IgnoreDeduced=*/true))
         LV.merge(getLVForDecl(Template, computation));
       continue;
 
diff --git clang/lib/AST/DeclCXX.cpp clang/lib/AST/DeclCXX.cpp
index 9a3ede426e91..01143391edab 100644
--- clang/lib/AST/DeclCXX.cpp
+++ clang/lib/AST/DeclCXX.cpp
@@ -109,7 +109,7 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D)
       ImplicitCopyAssignmentHasConstParam(true),
       HasDeclaredCopyConstructorWithConstParam(false),
       HasDeclaredCopyAssignmentWithConstParam(false),
-      IsAnyDestructorNoReturn(false), IsLambda(false),
+      IsAnyDestructorNoReturn(false), IsHLSLIntangible(false), IsLambda(false),
       IsParsingBaseSpecifiers(false), ComputedVisibleConversions(false),
       HasODRHash(false), Definition(D) {}
 
@@ -431,6 +431,9 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases,
     if (BaseClassDecl->isAnyDestructorNoReturn())
       data().IsAnyDestructorNoReturn = true;
 
+    if (BaseClassDecl->isHLSLIntangible())
+      data().IsHLSLIntangible = true;
+
     // C++11 [class.copy]p18:
     //   The implicitly-declared copy assignment operator for a class X will
     //   have the form 'X& X::operator=(const X&)' if each direct base class B
@@ -1401,6 +1404,18 @@ void CXXRecordDecl::addedMember(Decl *D) {
     //   than subobjects of zero size
     if (data().Empty && !IsZeroSize)
       data().Empty = false;
+
+    if (getLangOpts().HLSL) {
+      const Type *Ty = Field->getType()->getUnqualifiedDesugaredType();
+      while (isa<ConstantArrayType>(Ty))
+        Ty = Ty->getArrayElementTypeNoTypeQual();
+
+      Ty = Ty->getUnqualifiedDesugaredType();
+      if (Ty->isBuiltinType())
+        data().IsHLSLIntangible |= Ty->isHLSLIntangibleType();
+      else if (const RecordType *RT = dyn_cast<RecordType>(Ty))
+        data().IsHLSLIntangible |= RT->getAsCXXRecordDecl()->isHLSLIntangible();
+    }
   }
 
   // Handle using declarations of conversion functions.
diff --git clang/lib/AST/Expr.cpp clang/lib/AST/Expr.cpp
index 96c6276f3f34..e10142eff8ec 100644
--- clang/lib/AST/Expr.cpp
+++ clang/lib/AST/Expr.cpp
@@ -13,6 +13,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/APValue.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/ASTLambda.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/ComputeDependence.h"
 #include "clang/AST/DeclCXX.h"
@@ -1474,6 +1475,7 @@ CallExpr::CallExpr(StmtClass SC, Expr *Fn, ArrayRef<Expr *> PreArgs,
   this->computeDependence();
 
   CallExprBits.HasFPFeatures = FPFeatures.requiresTrailingStorage();
+  CallExprBits.IsCoroElideSafe = false;
   if (hasStoredFPFeatures())
     setStoredFPFeatures(FPFeatures);
 }
@@ -1489,6 +1491,7 @@ CallExpr::CallExpr(StmtClass SC, unsigned NumPreArgs, unsigned NumArgs,
   assert((CallExprBits.OffsetToTrailingObjects == OffsetToTrailingObjects) &&
          "OffsetToTrailingObjects overflow!");
   CallExprBits.HasFPFeatures = HasFPFeatures;
+  CallExprBits.IsCoroElideSafe = false;
 }
 
 CallExpr *CallExpr::Create(const ASTContext &Ctx, Expr *Fn,
@@ -1921,7 +1924,6 @@ bool CastExpr::CastConsistency() const {
   case CK_FixedPointToIntegral:
   case CK_IntegralToFixedPoint:
   case CK_MatrixCast:
-  case CK_HLSLVectorTruncation:
     assert(!getType()->isBooleanType() && "unheralded conversion to bool");
     goto CheckNoBasePath;
 
@@ -1942,6 +1944,7 @@ bool CastExpr::CastConsistency() const {
   case CK_BuiltinFnToFnPtr:
   case CK_FixedPointToBoolean:
   case CK_HLSLArrayRValue:
+  case CK_HLSLVectorTruncation:
   CheckNoBasePath:
     assert(path_empty() && "Cast kind should not have a base path!");
     break;
@@ -2287,6 +2290,15 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx,
     Context = getParentContext();
   }
 
+  // If we are currently parsing a lambda declarator, we might not have a fully
+  // formed call operator declaration yet, and we could not form a function name
+  // for it. Because we do not have access to Sema/function scopes here, we
+  // detect this case by relying on the fact such method doesn't yet have a
+  // type.
+  if (const auto *D = dyn_cast<CXXMethodDecl>(Context);
+      D && D->getFunctionTypeLoc().isNull() && isLambdaCallOperator(D))
+    Context = D->getParent()->getParent();
+
   PresumedLoc PLoc = Ctx.getSourceManager().getPresumedLoc(
       Ctx.getSourceManager().getExpansionRange(Loc).getEnd());
 
diff --git clang/lib/AST/ExprConstant.cpp clang/lib/AST/ExprConstant.cpp
index 3dc13c14c003..6387e375dda7 100644
--- clang/lib/AST/ExprConstant.cpp
+++ clang/lib/AST/ExprConstant.cpp
@@ -1222,7 +1222,7 @@ namespace {
   public:
     /// Should we continue evaluation after encountering a side-effect that we
     /// couldn't model?
-    bool keepEvaluatingAfterSideEffect() {
+    bool keepEvaluatingAfterSideEffect() const override {
       switch (EvalMode) {
       case EM_IgnoreSideEffects:
         return true;
@@ -1240,7 +1240,7 @@ namespace {
 
     /// Note that we have had a side-effect, and determine whether we should
     /// keep evaluating.
-    bool noteSideEffect() {
+    bool noteSideEffect() override {
       EvalStatus.HasSideEffects = true;
       return keepEvaluatingAfterSideEffect();
     }
@@ -1522,7 +1522,8 @@ CallStackFrame::~CallStackFrame() {
 }
 
 static bool isRead(AccessKinds AK) {
-  return AK == AK_Read || AK == AK_ReadObjectRepresentation;
+  return AK == AK_Read || AK == AK_ReadObjectRepresentation ||
+         AK == AK_IsWithinLifetime;
 }
 
 static bool isModification(AccessKinds AK) {
@@ -1532,6 +1533,7 @@ static bool isModification(AccessKinds AK) {
   case AK_MemberCall:
   case AK_DynamicCast:
   case AK_TypeId:
+  case AK_IsWithinLifetime:
     return false;
   case AK_Assign:
   case AK_Increment:
@@ -1549,7 +1551,8 @@ static bool isAnyAccess(AccessKinds AK) {
 
 /// Is this an access per the C++ definition?
 static bool isFormalAccess(AccessKinds AK) {
-  return isAnyAccess(AK) && AK != AK_Construct && AK != AK_Destroy;
+  return isAnyAccess(AK) && AK != AK_Construct && AK != AK_Destroy &&
+         AK != AK_IsWithinLifetime;
 }
 
 /// Is this kind of axcess valid on an indeterminate object value?
@@ -1561,6 +1564,7 @@ static bool isValidIndeterminateAccess(AccessKinds AK) {
     // These need the object's value.
     return false;
 
+  case AK_IsWithinLifetime:
   case AK_ReadObjectRepresentation:
   case AK_Assign:
   case AK_Construct:
@@ -3707,7 +3711,8 @@ struct CompleteObject {
     // In C++14 onwards, it is permitted to read a mutable member whose
     // lifetime began within the evaluation.
     // FIXME: Should we also allow this in C++11?
-    if (!Info.getLangOpts().CPlusPlus14)
+    if (!Info.getLangOpts().CPlusPlus14 &&
+        AK != AccessKinds::AK_IsWithinLifetime)
       return false;
     return lifetimeStartedInEvaluation(Info, Base, /*MutableSubobject*/true);
   }
@@ -3760,6 +3765,12 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj,
     if ((O->isAbsent() && !(handler.AccessKind == AK_Construct && I == N)) ||
         (O->isIndeterminate() &&
          !isValidIndeterminateAccess(handler.AccessKind))) {
+      // Object has ended lifetime.
+      // If I is non-zero, some subobject (member or array element) of a
+      // complete object has ended its lifetime, so this is valid for
+      // IsWithinLifetime, resulting in false.
+      if (I != 0 && handler.AccessKind == AK_IsWithinLifetime)
+        return false;
       if (!Info.checkingPotentialConstantExpression())
         Info.FFDiag(E, diag::note_constexpr_access_uninit)
             << handler.AccessKind << O->isIndeterminate()
@@ -3927,6 +3938,9 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj,
             // Placement new onto an inactive union member makes it active.
             O->setUnion(Field, APValue());
           } else {
+            // Pointer to/into inactive union member: Not within lifetime
+            if (handler.AccessKind == AK_IsWithinLifetime)
+              return false;
             // FIXME: If O->getUnionValue() is absent, report that there's no
             // active union member rather than reporting the prior active union
             // member. We'll need to fix nullptr_t to not use APValue() as its
@@ -10921,6 +10935,15 @@ bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) {
 
     return true;
   }
+  case CK_HLSLVectorTruncation: {
+    APValue Val;
+    SmallVector<APValue, 4> Elements;
+    if (!EvaluateVector(SE, Val, Info))
+      return Error(E);
+    for (unsigned I = 0; I < NElts; I++)
+      Elements.push_back(Val.getVectorElt(I));
+    return Success(Elements, E);
+  }
   default:
     return ExprEvaluatorBaseTy::VisitCastExpr(E);
   }
@@ -11684,6 +11707,9 @@ public:
 
   bool ZeroInitialization(const Expr *E) { return Success(0, E); }
 
+  friend std::optional<bool> EvaluateBuiltinIsWithinLifetime(IntExprEvaluator &,
+                                                             const CallExpr *);
+
   //===--------------------------------------------------------------------===//
   //                            Visitor Methods
   //===--------------------------------------------------------------------===//
@@ -12743,6 +12769,11 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     return Success(Info.InConstantContext, E);
   }
 
+  case Builtin::BI__builtin_is_within_lifetime:
+    if (auto result = EvaluateBuiltinIsWithinLifetime(*this, E))
+      return Success(*result, E);
+    return false;
+
   case Builtin::BI__builtin_ctz:
   case Builtin::BI__builtin_ctzl:
   case Builtin::BI__builtin_ctzll:
@@ -13895,16 +13926,6 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E,
     SubobjectDesignator &LHSDesignator = LHSValue.getLValueDesignator();
     SubobjectDesignator &RHSDesignator = RHSValue.getLValueDesignator();
 
-    // C++11 [expr.rel]p3:
-    //   Pointers to void (after pointer conversions) can be compared, with a
-    //   result defined as follows: If both pointers represent the same
-    //   address or are both the null pointer value, the result is true if the
-    //   operator is <= or >= and false otherwise; otherwise the result is
-    //   unspecified.
-    // We interpret this as applying to pointers to *cv* void.
-    if (LHSTy->isVoidPointerType() && LHSOffset != RHSOffset && IsRelational)
-      Info.CCEDiag(E, diag::note_constexpr_void_comparison);
-
     // C++11 [expr.rel]p2:
     // - If two pointers point to non-static data members of the same object,
     //   or to subobjects or array elements fo such members, recursively, the
@@ -14466,7 +14487,6 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) {
   case CK_FixedPointCast:
   case CK_IntegralToFixedPoint:
   case CK_MatrixCast:
-  case CK_HLSLVectorTruncation:
     llvm_unreachable("invalid cast kind for integral value");
 
   case CK_BitCast:
@@ -14639,6 +14659,12 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) {
       return false;
     return Success(Value, E);
   }
+  case CK_HLSLVectorTruncation: {
+    APValue Val;
+    if (!EvaluateVector(SubExpr, Val, Info))
+      return Error(E);
+    return Success(Val.getVectorElt(0), E);
+  }
   }
 
   llvm_unreachable("unknown cast resulting in integral value");
@@ -15165,6 +15191,12 @@ bool FloatExprEvaluator::VisitCastExpr(const CastExpr *E) {
     Result = V.getComplexFloatReal();
     return true;
   }
+  case CK_HLSLVectorTruncation: {
+    APValue Val;
+    if (!EvaluateVector(SubExpr, Val, Info))
+      return Error(E);
+    return Success(Val.getVectorElt(0), E);
+  }
   }
 }
 
@@ -16269,7 +16301,7 @@ bool Expr::EvaluateAsConstantExpr(EvalResult &Result, const ASTContext &Ctx,
   Info.InConstantContext = true;
 
   if (Info.EnableNewConstInterp) {
-    if (!Info.Ctx.getInterpContext().evaluate(Info, this, Result.Val))
+    if (!Info.Ctx.getInterpContext().evaluate(Info, this, Result.Val, Kind))
       return false;
     return CheckConstantExpression(Info, getExprLoc(),
                                    getStorageType(Ctx, this), Result.Val, Kind);
@@ -17332,3 +17364,84 @@ bool Expr::tryEvaluateStrLen(uint64_t &Result, ASTContext &Ctx) const {
   EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
   return EvaluateBuiltinStrLen(this, Result, Info);
 }
+
+namespace {
+struct IsWithinLifetimeHandler {
+  EvalInfo &Info;
+  static constexpr AccessKinds AccessKind = AccessKinds::AK_IsWithinLifetime;
+  using result_type = std::optional<bool>;
+  std::optional<bool> failed() { return std::nullopt; }
+  template <typename T>
+  std::optional<bool> found(T &Subobj, QualType SubobjType) {
+    return true;
+  }
+};
+
+std::optional<bool> EvaluateBuiltinIsWithinLifetime(IntExprEvaluator &IEE,
+                                                    const CallExpr *E) {
+  EvalInfo &Info = IEE.Info;
+  // Sometimes this is called during some sorts of constant folding / early
+  // evaluation. These are meant for non-constant expressions and are not
+  // necessary since this consteval builtin will never be evaluated at runtime.
+  // Just fail to evaluate when not in a constant context.
+  if (!Info.InConstantContext)
+    return std::nullopt;
+  assert(E->getBuiltinCallee() == Builtin::BI__builtin_is_within_lifetime);
+  const Expr *Arg = E->getArg(0);
+  if (Arg->isValueDependent())
+    return std::nullopt;
+  LValue Val;
+  if (!EvaluatePointer(Arg, Val, Info))
+    return std::nullopt;
+
+  auto Error = [&](int Diag) {
+    bool CalledFromStd = false;
+    const auto *Callee = Info.CurrentCall->getCallee();
+    if (Callee && Callee->isInStdNamespace()) {
+      const IdentifierInfo *Identifier = Callee->getIdentifier();
+      CalledFromStd = Identifier && Identifier->isStr("is_within_lifetime");
+    }
+    Info.CCEDiag(CalledFromStd ? Info.CurrentCall->getCallRange().getBegin()
+                               : E->getExprLoc(),
+                 diag::err_invalid_is_within_lifetime)
+        << (CalledFromStd ? "std::is_within_lifetime"
+                          : "__builtin_is_within_lifetime")
+        << Diag;
+    return std::nullopt;
+  };
+  // C++2c [meta.const.eval]p4:
+  //   During the evaluation of an expression E as a core constant expression, a
+  //   call to this function is ill-formed unless p points to an object that is
+  //   usable in constant expressions or whose complete object's lifetime began
+  //   within E.
+
+  // Make sure it points to an object
+  // nullptr does not point to an object
+  if (Val.isNullPointer() || Val.getLValueBase().isNull())
+    return Error(0);
+  QualType T = Val.getLValueBase().getType();
+  assert(!T->isFunctionType() &&
+         "Pointers to functions should have been typed as function pointers "
+         "which would have been rejected earlier");
+  assert(T->isObjectType());
+  // Hypothetical array element is not an object
+  if (Val.getLValueDesignator().isOnePastTheEnd())
+    return Error(1);
+  assert(Val.getLValueDesignator().isValidSubobject() &&
+         "Unchecked case for valid subobject");
+  // All other ill-formed values should have failed EvaluatePointer, so the
+  // object should be a pointer to an object that is usable in a constant
+  // expression or whose complete lifetime began within the expression
+  CompleteObject CO =
+      findCompleteObject(Info, E, AccessKinds::AK_IsWithinLifetime, Val, T);
+  // The lifetime hasn't begun yet if we are still evaluating the
+  // initializer ([basic.life]p(1.2))
+  if (Info.EvaluatingDeclValue && CO.Value == Info.EvaluatingDeclValue)
+    return Error(2);
+
+  if (!CO)
+    return false;
+  IsWithinLifetimeHandler handler{Info};
+  return findSubobject(Info, E, CO, Val.getLValueDesignator(), handler);
+}
+} // namespace
diff --git clang/lib/AST/ItaniumMangle.cpp clang/lib/AST/ItaniumMangle.cpp
index 1a47caac5a50..b6e1da0c3192 100644
--- clang/lib/AST/ItaniumMangle.cpp
+++ clang/lib/AST/ItaniumMangle.cpp
@@ -2384,6 +2384,8 @@ void CXXNameMangler::mangleType(TemplateName TN) {
     Out << "_SUBSTPACK_";
     break;
   }
+  case TemplateName::DeducedTemplate:
+    llvm_unreachable("Unexpected DeducedTemplate");
   }
 
   addSubstitution(TN);
@@ -2502,6 +2504,7 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty,
     case TemplateName::OverloadedTemplate:
     case TemplateName::AssumedTemplate:
     case TemplateName::DependentTemplate:
+    case TemplateName::DeducedTemplate:
       llvm_unreachable("invalid base for a template specialization type");
 
     case TemplateName::SubstTemplateTemplateParm: {
@@ -3381,8 +3384,7 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
   // The SVE types are effectively target-specific.  The mangling scheme
   // is defined in the appendices to the Procedure Call Standard for the
   // Arm Architecture.
-#define SVE_VECTOR_TYPE(InternalName, MangledName, Id, SingletonId, NumEls,    \
-                        ElBits, IsSigned, IsFP, IsBF)                          \
+#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId)                    \
   case BuiltinType::Id:                                                        \
     if (T->getKind() == BuiltinType::SveBFloat16 &&                            \
         isCompatibleWith(LangOptions::ClangABI::Ver17)) {                      \
@@ -3391,21 +3393,18 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
       Out << "u" << type_name.size() << type_name;                             \
     } else {                                                                   \
       type_name = MangledName;                                                 \
-      Out << (type_name == InternalName ? "u" : "") << type_name.size()        \
-          << type_name;                                                        \
+      Out << (type_name == Name ? "u" : "") << type_name.size() << type_name;  \
     }                                                                          \
     break;
-#define SVE_PREDICATE_TYPE(InternalName, MangledName, Id, SingletonId, NumEls) \
+#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId)                 \
   case BuiltinType::Id:                                                        \
     type_name = MangledName;                                                   \
-    Out << (type_name == InternalName ? "u" : "") << type_name.size()          \
-        << type_name;                                                          \
+    Out << (type_name == Name ? "u" : "") << type_name.size() << type_name;    \
     break;
-#define SVE_OPAQUE_TYPE(InternalName, MangledName, Id, SingletonId)            \
+#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId)                    \
   case BuiltinType::Id:                                                        \
     type_name = MangledName;                                                   \
-    Out << (type_name == InternalName ? "u" : "") << type_name.size()          \
-        << type_name;                                                          \
+    Out << (type_name == Name ? "u" : "") << type_name.size() << type_name;    \
     break;
 #include "clang/Basic/AArch64SVEACLETypes.def"
 #define PPC_VECTOR_TYPE(Name, Id, Size) \
@@ -5936,7 +5935,10 @@ struct CXXNameMangler::TemplateArgManglingInfo {
     // that of the template.
     auto *TTP = cast<TemplateTemplateParmDecl>(Param);
     TemplateName ArgTemplateName = Arg.getAsTemplateOrTemplatePattern();
-    const TemplateDecl *ArgTemplate = ArgTemplateName.getAsTemplateDecl();
+    assert(!ArgTemplateName.getTemplateDeclAndDefaultArgs().second &&
+           "A DeducedTemplateName shouldn't escape partial ordering");
+    const TemplateDecl *ArgTemplate =
+        ArgTemplateName.getAsTemplateDecl(/*IgnoreDeduced=*/true);
     if (!ArgTemplate)
       return true;
 
diff --git clang/lib/AST/ODRHash.cpp clang/lib/AST/ODRHash.cpp
index b748093831e3..192931436381 100644
--- clang/lib/AST/ODRHash.cpp
+++ clang/lib/AST/ODRHash.cpp
@@ -162,6 +162,8 @@ void ODRHash::AddTemplateName(TemplateName Name) {
   case TemplateName::SubstTemplateTemplateParmPack:
   case TemplateName::UsingTemplate:
     break;
+  case TemplateName::DeducedTemplate:
+    llvm_unreachable("Unexpected DeducedTemplate");
   }
 }
 
diff --git clang/lib/AST/OpenMPClause.cpp clang/lib/AST/OpenMPClause.cpp
index 7e73c0762394..eb15aa844069 100644
--- clang/lib/AST/OpenMPClause.cpp
+++ clang/lib/AST/OpenMPClause.cpp
@@ -1125,16 +1125,12 @@ unsigned OMPClauseMappableExprCommon::getComponentsTotalNumber(
 
 unsigned OMPClauseMappableExprCommon::getUniqueDeclarationsTotalNumber(
     ArrayRef<const ValueDecl *> Declarations) {
-  unsigned TotalNum = 0u;
-  llvm::SmallPtrSet<const ValueDecl *, 8> Cache;
+  llvm::SmallPtrSet<const ValueDecl *, 8> UniqueDecls;
   for (const ValueDecl *D : Declarations) {
     const ValueDecl *VD = D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
-    if (Cache.count(VD))
-      continue;
-    ++TotalNum;
-    Cache.insert(VD);
+    UniqueDecls.insert(VD);
   }
-  return TotalNum;
+  return UniqueDecls.size();
 }
 
 OMPMapClause *OMPMapClause::Create(
diff --git clang/lib/AST/TemplateName.cpp clang/lib/AST/TemplateName.cpp
index d4e8a8971a97..044a1a92469a 100644
--- clang/lib/AST/TemplateName.cpp
+++ clang/lib/AST/TemplateName.cpp
@@ -34,6 +34,30 @@
 
 using namespace clang;
 
+DeducedTemplateStorage::DeducedTemplateStorage(TemplateName Underlying,
+                                               const DefaultArguments &DefArgs)
+    : UncommonTemplateNameStorage(Deduced, /*Index=*/DefArgs.StartPos,
+                                  DefArgs.Args.size()),
+      Underlying(Underlying) {
+  llvm::copy(DefArgs.Args, reinterpret_cast<TemplateArgument *>(this + 1));
+}
+
+void DeducedTemplateStorage::Profile(llvm::FoldingSetNodeID &ID,
+                                     const ASTContext &Context) const {
+  Profile(ID, Context, Underlying, getDefaultArguments());
+}
+
+void DeducedTemplateStorage::Profile(llvm::FoldingSetNodeID &ID,
+                                     const ASTContext &Context,
+                                     TemplateName Underlying,
+                                     const DefaultArguments &DefArgs) {
+  Underlying.Profile(ID);
+  ID.AddInteger(DefArgs.StartPos);
+  ID.AddInteger(DefArgs.Args.size());
+  for (const TemplateArgument &Arg : DefArgs.Args)
+    Arg.Profile(ID, Context);
+}
+
 TemplateArgument
 SubstTemplateTemplateParmPackStorage::getArgumentPack() const {
   return TemplateArgument(llvm::ArrayRef(Arguments, Bits.Data));
@@ -115,6 +139,8 @@ TemplateName::TemplateName(SubstTemplateTemplateParmPackStorage *Storage)
 TemplateName::TemplateName(QualifiedTemplateName *Qual) : Storage(Qual) {}
 TemplateName::TemplateName(DependentTemplateName *Dep) : Storage(Dep) {}
 TemplateName::TemplateName(UsingShadowDecl *Using) : Storage(Using) {}
+TemplateName::TemplateName(DeducedTemplateStorage *Deduced)
+    : Storage(Deduced) {}
 
 bool TemplateName::isNull() const { return Storage.isNull(); }
 
@@ -139,28 +165,63 @@ TemplateName::NameKind TemplateName::getKind() const {
     return AssumedTemplate;
   if (uncommon->getAsSubstTemplateTemplateParm())
     return SubstTemplateTemplateParm;
+  if (uncommon->getAsDeducedTemplateName())
+    return DeducedTemplate;
+
+  assert(uncommon->getAsSubstTemplateTemplateParmPack() != nullptr);
   return SubstTemplateTemplateParmPack;
 }
 
-TemplateDecl *TemplateName::getAsTemplateDecl() const {
-  if (Decl *TemplateOrUsing = Storage.dyn_cast<Decl *>()) {
-    if (UsingShadowDecl *USD = dyn_cast<UsingShadowDecl>(TemplateOrUsing))
-      return cast<TemplateDecl>(USD->getTargetDecl());
-
-    assert(isa<TemplateDecl>(TemplateOrUsing));
-    return cast<TemplateDecl>(TemplateOrUsing);
-  }
+TemplateDecl *TemplateName::getAsTemplateDecl(bool IgnoreDeduced) const {
+  TemplateName Name = *this;
+  while (std::optional<TemplateName> UnderlyingOrNone =
+             Name.desugar(IgnoreDeduced))
+    Name = *UnderlyingOrNone;
 
-  if (QualifiedTemplateName *QTN = getAsQualifiedTemplateName())
-    return QTN->getUnderlyingTemplate().getAsTemplateDecl();
+  if (!IgnoreDeduced)
+    assert(Name.getAsDeducedTemplateName() == nullptr &&
+           "Unexpected canonical DeducedTemplateName; Did you mean to use "
+           "getTemplateDeclAndDefaultArgs instead?");
 
-  if (SubstTemplateTemplateParmStorage *sub = getAsSubstTemplateTemplateParm())
-    return sub->getReplacement().getAsTemplateDecl();
+  return cast_if_present<TemplateDecl>(Name.Storage.dyn_cast<Decl *>());
+}
 
-  if (UsingShadowDecl *USD = getAsUsingShadowDecl())
-    return cast<TemplateDecl>(USD->getTargetDecl());
+std::pair<TemplateDecl *, DefaultArguments>
+TemplateName::getTemplateDeclAndDefaultArgs() const {
+  for (TemplateName Name = *this; /**/; /**/) {
+    if (Name.getKind() == TemplateName::DeducedTemplate) {
+      DeducedTemplateStorage *DTS = Name.getAsDeducedTemplateName();
+      TemplateDecl *TD =
+          DTS->getUnderlying().getAsTemplateDecl(/*IgnoreDeduced=*/true);
+      DefaultArguments DefArgs = DTS->getDefaultArguments();
+      if (TD && DefArgs)
+        assert(DefArgs.StartPos + DefArgs.Args.size() <=
+               TD->getTemplateParameters()->size());
+      return {TD, DTS->getDefaultArguments()};
+    }
+    if (std::optional<TemplateName> UnderlyingOrNone =
+            Name.desugar(/*IgnoreDeduced=*/false)) {
+      Name = *UnderlyingOrNone;
+      continue;
+    }
+    return {cast_if_present<TemplateDecl>(Name.Storage.dyn_cast<Decl *>()), {}};
+  }
+}
 
-  return nullptr;
+std::optional<TemplateName> TemplateName::desugar(bool IgnoreDeduced) const {
+  if (Decl *D = Storage.dyn_cast<Decl *>()) {
+    if (auto *USD = dyn_cast<UsingShadowDecl>(D))
+      return TemplateName(USD->getTargetDecl());
+    return std::nullopt;
+  }
+  if (QualifiedTemplateName *QTN = getAsQualifiedTemplateName())
+    return QTN->getUnderlyingTemplate();
+  if (SubstTemplateTemplateParmStorage *S = getAsSubstTemplateTemplateParm())
+    return S->getReplacement();
+  if (IgnoreDeduced)
+    if (DeducedTemplateStorage *S = getAsDeducedTemplateName())
+      return S->getUnderlying();
+  return std::nullopt;
 }
 
 OverloadedTemplateStorage *TemplateName::getAsOverloadedTemplate() const {
@@ -214,26 +275,20 @@ UsingShadowDecl *TemplateName::getAsUsingShadowDecl() const {
   return nullptr;
 }
 
+DeducedTemplateStorage *TemplateName::getAsDeducedTemplateName() const {
+  if (UncommonTemplateNameStorage *Uncommon =
+          Storage.dyn_cast<UncommonTemplateNameStorage *>())
+    return Uncommon->getAsDeducedTemplateName();
+
+  return nullptr;
+}
+
 TemplateNameDependence TemplateName::getDependence() const {
-  auto D = TemplateNameDependence::None;
   switch (getKind()) {
-  case TemplateName::NameKind::QualifiedTemplate:
-    if (NestedNameSpecifier *NNS = getAsQualifiedTemplateName()->getQualifier())
-      D |= toTemplateNameDependence(NNS->getDependence());
-    break;
-  case TemplateName::NameKind::DependentTemplate:
-    D |= toTemplateNameDependence(
-        getAsDependentTemplateName()->getQualifier()->getDependence());
-    break;
-  case TemplateName::NameKind::SubstTemplateTemplateParmPack:
-    D |= TemplateNameDependence::UnexpandedPack;
-    break;
-  case TemplateName::NameKind::OverloadedTemplate:
-    llvm_unreachable("overloaded templates shouldn't survive to here.");
-  default:
-    break;
-  }
-  if (TemplateDecl *Template = getAsTemplateDecl()) {
+  case NameKind::Template:
+  case NameKind::UsingTemplate: {
+    TemplateDecl *Template = getAsTemplateDecl();
+    auto D = TemplateNameDependence::None;
     if (auto *TTP = dyn_cast<TemplateTemplateParmDecl>(Template)) {
       D |= TemplateNameDependence::DependentInstantiation;
       if (TTP->isParameterPack())
@@ -246,10 +301,41 @@ TemplateNameDependence TemplateName::getDependence() const {
     if (Template->getDeclContext() &&
         Template->getDeclContext()->isDependentContext())
       D |= TemplateNameDependence::DependentInstantiation;
-  } else {
-    D |= TemplateNameDependence::DependentInstantiation;
+    return D;
+  }
+  case NameKind::QualifiedTemplate: {
+    QualifiedTemplateName *S = getAsQualifiedTemplateName();
+    TemplateNameDependence D = S->getUnderlyingTemplate().getDependence();
+    if (NestedNameSpecifier *NNS = S->getQualifier())
+      D |= toTemplateNameDependence(NNS->getDependence());
+    return D;
+  }
+  case NameKind::DependentTemplate: {
+    DependentTemplateName *S = getAsDependentTemplateName();
+    auto D = TemplateNameDependence::DependentInstantiation;
+    D |= toTemplateNameDependence(S->getQualifier()->getDependence());
+    return D;
+  }
+  case NameKind::SubstTemplateTemplateParm: {
+    auto *S = getAsSubstTemplateTemplateParm();
+    return S->getReplacement().getDependence();
+  }
+  case NameKind::SubstTemplateTemplateParmPack:
+    return TemplateNameDependence::UnexpandedPack |
+           TemplateNameDependence::DependentInstantiation;
+  case NameKind::DeducedTemplate: {
+    DeducedTemplateStorage *DTS = getAsDeducedTemplateName();
+    TemplateNameDependence D = DTS->getUnderlying().getDependence();
+    for (const TemplateArgument &Arg : DTS->getDefaultArguments().Args)
+      D |= toTemplateNameDependence(Arg.getDependence());
+    return D;
+  }
+  case NameKind::AssumedTemplate:
+    return TemplateNameDependence::DependentInstantiation;
+  case NameKind::OverloadedTemplate:
+    llvm_unreachable("overloaded templates shouldn't survive to here.");
   }
-  return D;
+  llvm_unreachable("Unknown TemplateName kind");
 }
 
 bool TemplateName::isDependent() const {
@@ -331,6 +417,11 @@ void TemplateName::print(raw_ostream &OS, const PrintingPolicy &Policy,
     OS << *SubstPack->getParameterPack();
   else if (AssumedTemplateStorage *Assumed = getAsAssumedTemplateName()) {
     Assumed->getDeclName().print(OS, Policy);
+  } else if (DeducedTemplateStorage *Deduced = getAsDeducedTemplateName()) {
+    Deduced->getUnderlying().print(OS, Policy);
+    DefaultArguments DefArgs = Deduced->getDefaultArguments();
+    OS << ":" << DefArgs.StartPos;
+    printTemplateArgumentList(OS, DefArgs.Args, Policy);
   } else {
     assert(getKind() == TemplateName::OverloadedTemplate);
     OverloadedTemplateStorage *OTS = getAsOverloadedTemplate();
diff --git clang/lib/AST/TextNodeDumper.cpp clang/lib/AST/TextNodeDumper.cpp
index c6b1b44206b2..3c51c7464718 100644
--- clang/lib/AST/TextNodeDumper.cpp
+++ clang/lib/AST/TextNodeDumper.cpp
@@ -1198,6 +1198,18 @@ void TextNodeDumper::dumpBareTemplateName(TemplateName TN) {
     dumpTemplateName(STS->getReplacement(), "replacement");
     return;
   }
+  case TemplateName::DeducedTemplate: {
+    OS << " deduced";
+    const DeducedTemplateStorage *DTS = TN.getAsDeducedTemplateName();
+    dumpTemplateName(DTS->getUnderlying(), "underlying");
+    AddChild("defaults", [=] {
+      auto [StartPos, Args] = DTS->getDefaultArguments();
+      OS << " start " << StartPos;
+      for (const TemplateArgument &Arg : Args)
+        AddChild([=] { Visit(Arg, SourceRange()); });
+    });
+    return;
+  }
   // FIXME: Implement these.
   case TemplateName::OverloadedTemplate:
     OS << " overloaded";
diff --git clang/lib/AST/Type.cpp clang/lib/AST/Type.cpp
index e89ce2e4b384..a55e6c8bf026 100644
--- clang/lib/AST/Type.cpp
+++ clang/lib/AST/Type.cpp
@@ -3992,12 +3992,12 @@ void DependentDecltypeType::Profile(llvm::FoldingSetNodeID &ID,
 
 PackIndexingType::PackIndexingType(const ASTContext &Context,
                                    QualType Canonical, QualType Pattern,
-                                   Expr *IndexExpr,
+                                   Expr *IndexExpr, bool ExpandsToEmptyPack,
                                    ArrayRef<QualType> Expansions)
     : Type(PackIndexing, Canonical,
            computeDependence(Pattern, IndexExpr, Expansions)),
       Context(Context), Pattern(Pattern), IndexExpr(IndexExpr),
-      Size(Expansions.size()) {
+      Size(Expansions.size()), ExpandsToEmptyPack(ExpandsToEmptyPack) {
 
   std::uninitialized_copy(Expansions.begin(), Expansions.end(),
                           getTrailingObjects<QualType>());
@@ -4042,9 +4042,10 @@ PackIndexingType::computeDependence(QualType Pattern, Expr *IndexExpr,
 
 void PackIndexingType::Profile(llvm::FoldingSetNodeID &ID,
                                const ASTContext &Context, QualType Pattern,
-                               Expr *E) {
+                               Expr *E, bool ExpandsToEmptyPack) {
   Pattern.Profile(ID);
   E->Profile(ID, Context, true);
+  ID.AddBoolean(ExpandsToEmptyPack);
 }
 
 UnaryTransformType::UnaryTransformType(QualType BaseType,
@@ -4302,7 +4303,8 @@ TemplateSpecializationType::TemplateSpecializationType(
           T.getKind() == TemplateName::SubstTemplateTemplateParm ||
           T.getKind() == TemplateName::SubstTemplateTemplateParmPack ||
           T.getKind() == TemplateName::UsingTemplate ||
-          T.getKind() == TemplateName::QualifiedTemplate) &&
+          T.getKind() == TemplateName::QualifiedTemplate ||
+          T.getKind() == TemplateName::DeducedTemplate) &&
          "Unexpected template name for TemplateSpecializationType");
 
   auto *TemplateArgs = reinterpret_cast<TemplateArgument *>(this + 1);
diff --git clang/lib/AST/TypePrinter.cpp clang/lib/AST/TypePrinter.cpp
index b1d9516c96eb..be627a6242eb 100644
--- clang/lib/AST/TypePrinter.cpp
+++ clang/lib/AST/TypePrinter.cpp
@@ -1635,7 +1635,8 @@ void TypePrinter::printTemplateId(const TemplateSpecializationType *T,
                                   raw_ostream &OS, bool FullyQualify) {
   IncludeStrongLifetimeRAII Strong(Policy);
 
-  TemplateDecl *TD = T->getTemplateName().getAsTemplateDecl();
+  TemplateDecl *TD =
+      T->getTemplateName().getAsTemplateDecl(/*IgnoreDeduced=*/true);
   // FIXME: Null TD never exercised in test suite.
   if (FullyQualify && TD) {
     if (!Policy.SuppressScope)
@@ -1942,6 +1943,10 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
   case attr::BTFTypeTag:
     llvm_unreachable("BTFTypeTag attribute handled separately");
 
+  case attr::HLSLResourceClass:
+  case attr::HLSLROV:
+    llvm_unreachable("HLSL resource type attributes handled separately");
+
   case attr::OpenCLPrivateAddressSpace:
   case attr::OpenCLGlobalAddressSpace:
   case attr::OpenCLGlobalDeviceAddressSpace:
@@ -2062,18 +2067,17 @@ void TypePrinter::printBTFTagAttributedAfter(const BTFTagAttributedType *T,
 void TypePrinter::printHLSLAttributedResourceBefore(
     const HLSLAttributedResourceType *T, raw_ostream &OS) {
   printBefore(T->getWrappedType(), OS);
+}
 
+void TypePrinter::printHLSLAttributedResourceAfter(
+    const HLSLAttributedResourceType *T, raw_ostream &OS) {
+  printAfter(T->getWrappedType(), OS);
   const HLSLAttributedResourceType::Attributes &Attrs = T->getAttrs();
   OS << " [[hlsl::resource_class("
      << HLSLResourceClassAttr::ConvertResourceClassToStr(Attrs.ResourceClass)
      << ")]]";
   if (Attrs.IsROV)
-    OS << " [[hlsl::is_rov()]]";
-}
-
-void TypePrinter::printHLSLAttributedResourceAfter(
-    const HLSLAttributedResourceType *T, raw_ostream &OS) {
-  printAfter(T->getWrappedType(), OS);
+    OS << " [[hlsl::is_rov]]";
 }
 
 void TypePrinter::printObjCInterfaceBefore(const ObjCInterfaceType *T,
diff --git clang/lib/Analysis/ThreadSafety.cpp clang/lib/Analysis/ThreadSafety.cpp
index e25b843c9bf8..5577f45aa521 100644
--- clang/lib/Analysis/ThreadSafety.cpp
+++ clang/lib/Analysis/ThreadSafety.cpp
@@ -922,6 +922,9 @@ public:
   handleRemovalFromIntersection(const FactSet &FSet, FactManager &FactMan,
                                 SourceLocation JoinLoc, LockErrorKind LEK,
                                 ThreadSafetyHandler &Handler) const override {
+    if (LEK == LEK_LockedAtEndOfFunction || LEK == LEK_NotLockedAtEndOfFunction)
+      return;
+
     for (const auto &UnderlyingMutex : UnderlyingMutexes) {
       const auto *Entry = FSet.findLock(FactMan, UnderlyingMutex.Cap);
       if ((UnderlyingMutex.Kind == UCK_Acquired && Entry) ||
@@ -1177,8 +1180,7 @@ void BeforeSet::checkBeforeAfter(const ValueDecl* StartVd,
       }
       // Transitively search other before sets, and warn on cycles.
       if (traverse(Vdb)) {
-        if (!CycMap.contains(Vd)) {
-          CycMap.insert(std::make_pair(Vd, true));
+        if (CycMap.try_emplace(Vd, true).second) {
           StringRef L1 = Vd->getName();
           Analyzer.Handler.handleBeforeAfterCycle(L1, Vd->getLocation());
         }
@@ -2224,7 +2226,7 @@ void ThreadSafetyAnalyzer::intersectAndWarn(FactSet &EntrySet,
       if (join(FactMan[*EntryIt], ExitFact,
                EntryLEK != LEK_LockedSomeLoopIterations))
         *EntryIt = Fact;
-    } else if (!ExitFact.managed()) {
+    } else if (!ExitFact.managed() || EntryLEK == LEK_LockedAtEndOfFunction) {
       ExitFact.handleRemovalFromIntersection(ExitSet, FactMan, JoinLoc,
                                              EntryLEK, Handler);
     }
@@ -2236,7 +2238,8 @@ void ThreadSafetyAnalyzer::intersectAndWarn(FactSet &EntrySet,
     const FactEntry *ExitFact = ExitSet.findLock(FactMan, *EntryFact);
 
     if (!ExitFact) {
-      if (!EntryFact->managed() || ExitLEK == LEK_LockedSomeLoopIterations)
+      if (!EntryFact->managed() || ExitLEK == LEK_LockedSomeLoopIterations ||
+          ExitLEK == LEK_NotLockedAtEndOfFunction)
         EntryFact->handleRemovalFromIntersection(EntrySetOrig, FactMan, JoinLoc,
                                                  ExitLEK, Handler);
       if (ExitLEK == LEK_LockedSomePredecessors)
diff --git clang/lib/Analysis/UnsafeBufferUsage.cpp clang/lib/Analysis/UnsafeBufferUsage.cpp
index da7446913f7c..21d4368151eb 100644
--- clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -10,12 +10,13 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/FormatString.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
@@ -247,6 +248,11 @@ AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer,
   return Handler->ignoreUnsafeBufferInContainer(Node.getBeginLoc());
 }
 
+AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *,
+              Handler) {
+  return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc());
+}
+
 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) {
   return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder);
 }
@@ -443,6 +449,446 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+AST_MATCHER_P(CallExpr, hasNumArgs, unsigned, Num) {
+  return Node.getNumArgs() == Num;
+}
+
+namespace libc_func_matchers {
+// Under `libc_func_matchers`, define a set of matchers that match unsafe
+// functions in libc and unsafe calls to them.
+
+//  A tiny parser to strip off common prefix and suffix of libc function names
+//  in real code.
+//
+//  Given a function name, `matchName` returns `CoreName` according to the
+//  following grammar:
+//
+//  LibcName     := CoreName | CoreName + "_s"
+//  MatchingName := "__builtin_" + LibcName              |
+//                  "__builtin___" + LibcName + "_chk"   |
+//                  "__asan_" + LibcName
+//
+struct LibcFunNamePrefixSuffixParser {
+  StringRef matchName(StringRef FunName, bool isBuiltin) {
+    // Try to match __builtin_:
+    if (isBuiltin && FunName.starts_with("__builtin_"))
+      // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
+      // no match:
+      return matchLibcNameOrBuiltinChk(
+          FunName.drop_front(10 /* truncate "__builtin_" */));
+    // Try to match __asan_:
+    if (FunName.starts_with("__asan_"))
+      return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */));
+    return matchLibcName(FunName);
+  }
+
+  // Parameter `Name` is the substring after stripping off the prefix
+  // "__builtin_".
+  StringRef matchLibcNameOrBuiltinChk(StringRef Name) {
+    if (Name.starts_with("__") && Name.ends_with("_chk"))
+      return matchLibcName(
+          Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
+    return matchLibcName(Name);
+  }
+
+  StringRef matchLibcName(StringRef Name) {
+    if (Name.ends_with("_s"))
+      return Name.drop_back(2 /* truncate "_s" */);
+    return Name;
+  }
+};
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+  if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
+    return true;
+  if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts()))
+    return true;
+  if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
+    const CXXMethodDecl *MD = MCE->getMethodDecl();
+    const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+    if (MD && RD && RD->isInStdNamespace())
+      if (MD->getName() == "c_str" && RD->getName() == "basic_string")
+        return true;
+  }
+  return false;
+}
+
+// Return true iff at least one of following cases holds:
+//  1. Format string is a literal and there is an unsafe pointer argument
+//     corresponding to an `s` specifier;
+//  2. Format string is not a literal and there is least an unsafe pointer
+//     argument (including the formatter argument).
+//
+// `UnsafeArg` is the output argument that will be set only if this function
+// returns true.
+static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
+                                  const unsigned FmtArgIdx, ASTContext &Ctx,
+                                  bool isKprintf = false) {
+  class StringFormatStringHandler
+      : public analyze_format_string::FormatStringHandler {
+    const CallExpr *Call;
+    unsigned FmtArgIdx;
+    const Expr *&UnsafeArg;
+
+  public:
+    StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx,
+                              const Expr *&UnsafeArg)
+        : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {}
+
+    bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+                               const char *startSpecifier,
+                               unsigned specifierLen,
+                               const TargetInfo &Target) override {
+      if (FS.getConversionSpecifier().getKind() ==
+          analyze_printf::PrintfConversionSpecifier::sArg) {
+        unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
+
+        if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
+          if (!isNullTermPointer(Call->getArg(ArgIdx))) {
+            UnsafeArg = Call->getArg(ArgIdx); // output
+            // returning false stops parsing immediately
+            return false;
+          }
+      }
+      return true; // continue parsing
+    }
+  };
+
+  const Expr *Fmt = Call->getArg(FmtArgIdx);
+
+  if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) {
+    StringRef FmtStr = SL->getString();
+    StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg);
+
+    return analyze_format_string::ParsePrintfString(
+        Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
+        Ctx.getTargetInfo(), isKprintf);
+  }
+  // If format is not a string literal, we cannot analyze the format string.
+  // In this case, this call is considered unsafe if at least one argument
+  // (including the format argument) is unsafe pointer.
+  return llvm::any_of(
+      llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
+      [&UnsafeArg](const Expr *Arg) -> bool {
+        if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg)) {
+          UnsafeArg = Arg;
+          return true;
+        }
+        return false;
+      });
+}
+
+// Matches a FunctionDecl node such that
+//  1. It's name, after stripping off predefined prefix and suffix, is
+//     `CoreName`; and
+//  2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
+//     is a set of libc function names.
+//
+//  Note: For predefined prefix and suffix, see `LibcFunNamePrefixSuffixParser`.
+//  The notation `CoreName[str/wcs]` means a new name obtained from replace
+//  string "wcs" with "str" in `CoreName`.
+AST_MATCHER(FunctionDecl, isPredefinedUnsafeLibcFunc) {
+  static std::unique_ptr<std::set<StringRef>> PredefinedNames = nullptr;
+  if (!PredefinedNames)
+    PredefinedNames =
+        std::make_unique<std::set<StringRef>, std::set<StringRef>>({
+            // numeric conversion:
+            "atof",
+            "atoi",
+            "atol",
+            "atoll",
+            "strtol",
+            "strtoll",
+            "strtoul",
+            "strtoull",
+            "strtof",
+            "strtod",
+            "strtold",
+            "strtoimax",
+            "strtoumax",
+            // "strfromf",  "strfromd", "strfroml", // C23?
+            // string manipulation:
+            "strcpy",
+            "strncpy",
+            "strlcpy",
+            "strcat",
+            "strncat",
+            "strlcat",
+            "strxfrm",
+            "strdup",
+            "strndup",
+            // string examination:
+            "strlen",
+            "strnlen",
+            "strcmp",
+            "strncmp",
+            "stricmp",
+            "strcasecmp",
+            "strcoll",
+            "strchr",
+            "strrchr",
+            "strspn",
+            "strcspn",
+            "strpbrk",
+            "strstr",
+            "strtok",
+            // "mem-" functions
+            "memchr",
+            "wmemchr",
+            "memcmp",
+            "wmemcmp",
+            "memcpy",
+            "memccpy",
+            "mempcpy",
+            "wmemcpy",
+            "memmove",
+            "wmemmove",
+            "memset",
+            "wmemset",
+            // IO:
+            "fread",
+            "fwrite",
+            "fgets",
+            "fgetws",
+            "gets",
+            "fputs",
+            "fputws",
+            "puts",
+            // others
+            "strerror_s",
+            "strerror_r",
+            "bcopy",
+            "bzero",
+            "bsearch",
+            "qsort",
+        });
+
+  auto *II = Node.getIdentifier();
+
+  if (!II)
+    return false;
+
+  StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+      II->getName(), Node.getBuiltinID());
+
+  // Match predefined names:
+  if (PredefinedNames->find(Name) != PredefinedNames->end())
+    return true;
+
+  std::string NameWCS = Name.str();
+  size_t WcsPos = NameWCS.find("wcs");
+
+  while (WcsPos != std::string::npos) {
+    NameWCS[WcsPos++] = 's';
+    NameWCS[WcsPos++] = 't';
+    NameWCS[WcsPos++] = 'r';
+    WcsPos = NameWCS.find("wcs", WcsPos);
+  }
+  if (PredefinedNames->find(NameWCS) != PredefinedNames->end())
+    return true;
+  // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They
+  // all should end with "scanf"):
+  return Name.ends_with("scanf");
+}
+
+// Match a call to one of the `v*printf` functions taking `va_list`.  We cannot
+// check safety for these functions so they should be changed to their
+// non-va_list versions.
+AST_MATCHER(FunctionDecl, isUnsafeVaListPrintfFunc) {
+  auto *II = Node.getIdentifier();
+
+  if (!II)
+    return false;
+
+  StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+      II->getName(), Node.getBuiltinID());
+
+  if (!Name.ends_with("printf"))
+    return false; // neither printf nor scanf
+  return Name.starts_with("v");
+}
+
+// Matches a call to one of the `sprintf` functions as they are always unsafe
+// and should be changed to `snprintf`.
+AST_MATCHER(FunctionDecl, isUnsafeSprintfFunc) {
+  auto *II = Node.getIdentifier();
+
+  if (!II)
+    return false;
+
+  StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+      II->getName(), Node.getBuiltinID());
+
+  if (!Name.ends_with("printf") ||
+      // Let `isUnsafeVaListPrintfFunc` check for cases with va-list:
+      Name.starts_with("v"))
+    return false;
+
+  StringRef Prefix = Name.drop_back(6);
+
+  if (Prefix.ends_with("w"))
+    Prefix = Prefix.drop_back(1);
+  return Prefix == "s";
+}
+
+// Match function declarations of `printf`, `fprintf`, `snprintf` and their wide
+// character versions.  Calls to these functions can be safe if their arguments
+// are carefully made safe.
+AST_MATCHER(FunctionDecl, isNormalPrintfFunc) {
+  auto *II = Node.getIdentifier();
+
+  if (!II)
+    return false;
+
+  StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
+      II->getName(), Node.getBuiltinID());
+
+  if (!Name.ends_with("printf") || Name.starts_with("v"))
+    return false;
+
+  StringRef Prefix = Name.drop_back(6);
+
+  if (Prefix.ends_with("w"))
+    Prefix = Prefix.drop_back(1);
+
+  return Prefix.empty() || Prefix == "k" || Prefix == "f" || Prefix == "sn";
+}
+
+// This matcher requires that it is known that the callee `isNormalPrintf`.
+// Then if the format string is a string literal, this matcher matches when at
+// least one string argument is unsafe. If the format is not a string literal,
+// this matcher matches when at least one pointer type argument is unsafe.
+AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
+              clang::ast_matchers::internal::Matcher<Expr>,
+              UnsafeStringArgMatcher) {
+  // Determine what printf it is by examining formal parameters:
+  const FunctionDecl *FD = Node.getDirectCallee();
+
+  assert(FD && "It should have been checked that FD is non-null.");
+
+  unsigned NumParms = FD->getNumParams();
+
+  if (NumParms < 1)
+    return false; // possibly some user-defined printf function
+
+  ASTContext &Ctx = Finder->getASTContext();
+  QualType FristParmTy = FD->getParamDecl(0)->getType();
+
+  if (!FristParmTy->isPointerType())
+    return false; // possibly some user-defined printf function
+
+  QualType FirstPteTy = (cast<PointerType>(FristParmTy))->getPointeeType();
+
+  if (!Ctx.getFILEType()
+           .isNull() && //`FILE *` must be in the context if it is fprintf
+      FirstPteTy.getCanonicalType() == Ctx.getFILEType().getCanonicalType()) {
+    // It is a fprintf:
+    const Expr *UnsafeArg;
+
+    if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 1, Ctx, false))
+      return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder);
+    return false;
+  }
+
+  if (FirstPteTy.isConstQualified()) {
+    // If the first parameter is a `const char *`, it is a printf/kprintf:
+    bool isKprintf = false;
+    const Expr *UnsafeArg;
+
+    if (auto *II = FD->getIdentifier())
+      isKprintf = II->getName() == "kprintf";
+    if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 0, Ctx, isKprintf))
+      return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder);
+    return false;
+  }
+
+  if (NumParms > 2) {
+    QualType SecondParmTy = FD->getParamDecl(1)->getType();
+
+    if (!FirstPteTy.isConstQualified() && SecondParmTy->isIntegerType()) {
+      // If the first parameter type is non-const qualified `char *` and the
+      // second is an integer, it is a snprintf:
+      const Expr *UnsafeArg;
+
+      if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 2, Ctx, false))
+        return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder);
+      return false;
+    }
+  }
+  // We don't really recognize this "normal" printf, the only thing we
+  // can do is to require all pointers to be null-terminated:
+  for (auto Arg : Node.arguments())
+    if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg))
+      if (UnsafeStringArgMatcher.matches(*Arg, Finder, Builder))
+        return true;
+  return false;
+}
+
+// This matcher requires that it is known that the callee `isNormalPrintf`.
+// Then it matches if the first two arguments of the call is a pointer and an
+// integer and they are not in a safe pattern.
+//
+// For the first two arguments: `ptr` and `size`, they are safe if in the
+// following patterns:
+//    ptr := DRE.data();
+//    size:= DRE.size()/DRE.size_bytes()
+// And DRE is a hardened container or view.
+AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
+  const FunctionDecl *FD = Node.getDirectCallee();
+
+  assert(FD && "It should have been checked that FD is non-null.");
+
+  if (FD->getNumParams() < 3)
+    return false; // Not an snprint
+
+  QualType FirstParmTy = FD->getParamDecl(0)->getType();
+
+  if (!FirstParmTy->isPointerType())
+    return false; // Not an snprint
+
+  QualType FirstPteTy = cast<PointerType>(FirstParmTy)->getPointeeType();
+  const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1);
+
+  if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() ||
+      !Size->getType()->isIntegerType())
+    return false; // not an snprintf call
+
+  static StringRef SizedObjs[] = {"span", "array", "vector",
+                                  "basic_string_view", "basic_string"};
+  Buf = Buf->IgnoreParenImpCasts();
+  Size = Size->IgnoreParenImpCasts();
+  if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Buf))
+    if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) {
+      auto *DREOfPtr = dyn_cast<DeclRefExpr>(
+          MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts());
+      auto *DREOfSize = dyn_cast<DeclRefExpr>(
+          MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts());
+
+      if (!DREOfPtr || !DREOfSize)
+        return true; // not in safe pattern
+      if (DREOfPtr->getDecl() != DREOfSize->getDecl())
+        return true; // not in safe pattern
+      if (MCEPtr->getMethodDecl()->getName() != "data")
+        return true; // not in safe pattern
+
+      if (MCESize->getMethodDecl()->getName() == "size_bytes" ||
+          // Note here the pointer must be a pointer-to-char type unless there
+          // is explicit casting.  If there is explicit casting, this branch
+          // is unreachable. Thus, at this branch "size" and "size_bytes" are
+          // equivalent as the pointer is a char pointer:
+          MCESize->getMethodDecl()->getName() == "size")
+        for (StringRef SizedObj : SizedObjs)
+          if (MCEPtr->getRecordDecl()->isInStdNamespace() &&
+              MCEPtr->getRecordDecl()->getCanonicalDecl()->getName() ==
+                  SizedObj)
+            return false; // It is in fact safe
+    }
+  return true; // ptr and size are not in safe pattern
+}
+} // namespace libc_func_matchers
 } // namespace clang::ast_matchers
 
 namespace {
@@ -760,6 +1206,10 @@ public:
                     .bind(SpanTwoParamConstructorTag));
   }
 
+  static Matcher matcher(const UnsafeBufferUsageHandler *Handler) {
+    return stmt(unless(ignoreUnsafeBufferInContainer(Handler)), matcher());
+  }
+
   void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
                              bool IsRelatedToDecl,
                              ASTContext &Ctx) const override {
@@ -1030,6 +1480,98 @@ public:
   DeclUseList getClaimedVarUseSites() const override { return {}; }
 };
 
+class UnsafeLibcFunctionCallGadget : public WarningGadget {
+  const CallExpr *const Call;
+  const Expr *UnsafeArg = nullptr;
+  constexpr static const char *const Tag = "UnsafeLibcFunctionCall";
+  // Extra tags for additional information:
+  constexpr static const char *const UnsafeSprintfTag =
+      "UnsafeLibcFunctionCall_sprintf";
+  constexpr static const char *const UnsafeSizedByTag =
+      "UnsafeLibcFunctionCall_sized_by";
+  constexpr static const char *const UnsafeStringTag =
+      "UnsafeLibcFunctionCall_string";
+  constexpr static const char *const UnsafeVaListTag =
+      "UnsafeLibcFunctionCall_va_list";
+
+  enum UnsafeKind {
+    OTHERS = 0,  // no specific information, the callee function is unsafe
+    SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead.
+    SIZED_BY =
+        2, // the first two arguments of `snprintf` function have
+           // "__sized_by" relation but they do not conform to safe patterns
+    STRING = 3,  // an argument is a pointer-to-char-as-string but does not
+                 // guarantee null-termination
+    VA_LIST = 4, // one of the `-printf`s function that take va_list, which is
+                 // considered unsafe as it is not compile-time check
+  } WarnedFunKind = OTHERS;
+
+public:
+  UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result)
+      : WarningGadget(Kind::UnsafeLibcFunctionCall),
+        Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {
+    if (Result.Nodes.getNodeAs<Decl>(UnsafeSprintfTag))
+      WarnedFunKind = SPRINTF;
+    else if (auto *E = Result.Nodes.getNodeAs<Expr>(UnsafeStringTag)) {
+      WarnedFunKind = STRING;
+      UnsafeArg = E;
+    } else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSizedByTag)) {
+      WarnedFunKind = SIZED_BY;
+      UnsafeArg = Call->getArg(0);
+    } else if (Result.Nodes.getNodeAs<Decl>(UnsafeVaListTag))
+      WarnedFunKind = VA_LIST;
+  }
+
+  static Matcher matcher(const UnsafeBufferUsageHandler *Handler) {
+    return stmt(unless(ignoreUnsafeLibcCall(Handler)),
+      anyOf(
+        callExpr(
+            callee(functionDecl(anyOf(
+                // Match a predefined unsafe libc
+                // function:
+                functionDecl(libc_func_matchers::isPredefinedUnsafeLibcFunc()),
+                // Match a call to one of the `v*printf` functions
+                // taking va-list, which cannot be checked at
+                // compile-time:
+                functionDecl(libc_func_matchers::isUnsafeVaListPrintfFunc())
+                    .bind(UnsafeVaListTag),
+                // Match a call to a `sprintf` function, which is never
+                // safe:
+                functionDecl(libc_func_matchers::isUnsafeSprintfFunc())
+                    .bind(UnsafeSprintfTag)))),
+            //  (unless the call has a sole string literal argument):
+            unless(
+                allOf(hasArgument(0, expr(stringLiteral())), hasNumArgs(1)))),
+
+        // The following two cases require checking against actual
+        // arguments of the call:
+
+        // Match a call to an `snprintf` function. And first two
+        // arguments of the call (that describe a buffer) are not in
+        // safe patterns:
+        callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())),
+                 libc_func_matchers::hasUnsafeSnprintfBuffer())
+            .bind(UnsafeSizedByTag),
+        // Match a call to a `printf` function, which can be safe if
+        // all arguments are null-terminated:
+        callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())),
+                 libc_func_matchers::hasUnsafePrintfStringArg(
+                     expr().bind(UnsafeStringTag)))));
+  }
+
+  const Stmt *getBaseStmt() const { return Call; }
+
+  SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); }
+
+  void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
+                             bool IsRelatedToDecl,
+                             ASTContext &Ctx) const override {
+    Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx, UnsafeArg);
+  }
+
+  DeclUseList getClaimedVarUseSites() const override { return {}; }
+};
+
 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
 // Context (see `isInUnspecifiedLvalueContext`).
 // Note here `[]` is the built-in subscript operator.
@@ -1452,10 +1994,9 @@ findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler,
 #define WARNING_GADGET(x)                                                      \
           allOf(x ## Gadget::matcher().bind(#x),                               \
                 notInSafeBufferOptOut(&Handler)),
-#define WARNING_CONTAINER_GADGET(x)                                            \
-          allOf(x ## Gadget::matcher().bind(#x),                               \
-                notInSafeBufferOptOut(&Handler),                               \
-                unless(ignoreUnsafeBufferInContainer(&Handler))),
+#define WARNING_OPTIONAL_GADGET(x)                                            \
+          allOf(x ## Gadget::matcher(&Handler).bind(#x),                      \
+                notInSafeBufferOptOut(&Handler)),
 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
             // Avoid a hanging comma.
             unless(stmt())
diff --git clang/lib/Basic/FileManager.cpp clang/lib/Basic/FileManager.cpp
index 4509cee1ca0f..6097b85a0306 100644
--- clang/lib/Basic/FileManager.cpp
+++ clang/lib/Basic/FileManager.cpp
@@ -692,5 +692,16 @@ void FileManager::PrintStats() const {
   llvm::errs() << NumFileLookups << " file lookups, "
                << NumFileCacheMisses << " file cache misses.\n";
 
+  getVirtualFileSystem().visit([](llvm::vfs::FileSystem &VFS) {
+    if (auto *T = dyn_cast_or_null<llvm::vfs::TracingFileSystem>(&VFS))
+      llvm::errs() << "\n*** Virtual File System Stats:\n"
+                   << T->NumStatusCalls << " status() calls\n"
+                   << T->NumOpenFileForReadCalls << " openFileForRead() calls\n"
+                   << T->NumDirBeginCalls << " dir_begin() calls\n"
+                   << T->NumGetRealPathCalls << " getRealPath() calls\n"
+                   << T->NumExistsCalls << " exists() calls\n"
+                   << T->NumIsLocalCalls << " isLocal() calls\n";
+  });
+
   //llvm::errs() << PagesMapped << BytesOfPagesMapped << FSLookups;
 }
diff --git clang/lib/Basic/Targets/RISCV.cpp clang/lib/Basic/Targets/RISCV.cpp
index b89109e7725d..6f9d050fc71a 100644
--- clang/lib/Basic/Targets/RISCV.cpp
+++ clang/lib/Basic/Targets/RISCV.cpp
@@ -388,7 +388,7 @@ static void handleFullArchString(StringRef FullArchStr,
       FullArchStr, /* EnableExperimentalExtension */ true);
   if (llvm::errorToBool(RII.takeError())) {
     // Forward the invalid FullArchStr.
-    Features.push_back("+" + FullArchStr.str());
+    Features.push_back(FullArchStr.str());
   } else {
     // Append a full list of features, including any negative extensions so that
     // we override the CPU's features.
@@ -478,3 +478,7 @@ bool RISCVTargetInfo::validateCpuSupports(StringRef Feature) const {
   // __riscv_feature_bits structure.
   return -1 != llvm::RISCVISAInfo::getRISCVFeaturesBitsInfo(Feature).second;
 }
+
+bool RISCVTargetInfo::isValidFeatureName(StringRef Name) const {
+  return llvm::RISCVISAInfo::isSupportedExtensionFeature(Name);
+}
diff --git clang/lib/Basic/Targets/RISCV.h clang/lib/Basic/Targets/RISCV.h
index 626274b8fc43..b808ccc8e9cf 100644
--- clang/lib/Basic/Targets/RISCV.h
+++ clang/lib/Basic/Targets/RISCV.h
@@ -130,6 +130,7 @@ public:
   bool supportsCpuSupports() const override { return getTriple().isOSLinux(); }
   bool supportsCpuInit() const override { return getTriple().isOSLinux(); }
   bool validateCpuSupports(StringRef Feature) const override;
+  bool isValidFeatureName(StringRef Name) const override;
 };
 class LLVM_LIBRARY_VISIBILITY RISCV32TargetInfo : public RISCVTargetInfo {
 public:
diff --git clang/lib/Basic/Targets/SystemZ.h clang/lib/Basic/Targets/SystemZ.h
index 7390f25d6efb..f05ea473017b 100644
--- clang/lib/Basic/Targets/SystemZ.h
+++ clang/lib/Basic/Targets/SystemZ.h
@@ -48,7 +48,6 @@ static const unsigned ZOSAddressMap[] = {
 class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
 
   static const char *const GCCRegNames[];
-  std::string CPU;
   int ISARevision;
   bool HasTransactionalExecution;
   bool HasVector;
@@ -58,7 +57,7 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
 
 public:
   SystemZTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
-      : TargetInfo(Triple), CPU("z10"), ISARevision(8),
+      : TargetInfo(Triple), ISARevision(getISARevision("z10")),
         HasTransactionalExecution(false), HasVector(false), SoftFloat(false),
         UnalignedSymbols(false) {
     IntMaxType = SignedLong;
@@ -168,8 +167,7 @@ public:
   }
 
   bool setCPU(const std::string &Name) override {
-    CPU = Name;
-    ISARevision = getISARevision(CPU);
+    ISARevision = getISARevision(Name);
     return ISARevision != -1;
   }
 
diff --git clang/lib/CodeGen/CGBlocks.cpp clang/lib/CodeGen/CGBlocks.cpp
index 066139b1c78c..684fda744073 100644
--- clang/lib/CodeGen/CGBlocks.cpp
+++ clang/lib/CodeGen/CGBlocks.cpp
@@ -1163,7 +1163,8 @@ llvm::Type *CodeGenModule::getGenericBlockLiteralType() {
 }
 
 RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
-                                          ReturnValueSlot ReturnValue) {
+                                          ReturnValueSlot ReturnValue,
+                                          llvm::CallBase **CallOrInvoke) {
   const auto *BPT = E->getCallee()->getType()->castAs<BlockPointerType>();
   llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
   llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType();
@@ -1220,7 +1221,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
   CGCallee Callee(CGCalleeInfo(), Func);
 
   // And call the block.
-  return EmitCall(FnInfo, Callee, ReturnValue, Args);
+  return EmitCall(FnInfo, Callee, ReturnValue, Args, CallOrInvoke);
 }
 
 Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable) {
diff --git clang/lib/CodeGen/CGBuilder.h clang/lib/CodeGen/CGBuilder.h
index 08730a6a6672..b8036cf6e6a3 100644
--- clang/lib/CodeGen/CGBuilder.h
+++ clang/lib/CodeGen/CGBuilder.h
@@ -14,6 +14,7 @@
 #include "CodeGenTypeCache.h"
 #include "llvm/Analysis/Utils/Local.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GEPNoWrapFlags.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Type.h"
 
@@ -334,9 +335,10 @@ public:
 
   Address CreateGEP(Address Addr, ArrayRef<llvm::Value *> IdxList,
                     llvm::Type *ElementType, CharUnits Align,
-                    const Twine &Name = "") {
+                    const Twine &Name = "",
+                    llvm::GEPNoWrapFlags NW = llvm::GEPNoWrapFlags::none()) {
     llvm::Value *Ptr = emitRawPointerFromAddress(Addr);
-    return RawAddress(CreateGEP(Addr.getElementType(), Ptr, IdxList, Name),
+    return RawAddress(CreateGEP(Addr.getElementType(), Ptr, IdxList, Name, NW),
                       ElementType, Align);
   }
 
diff --git clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CGBuiltin.cpp
index 786c2c224b34..9950c06a0b9a 100644
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -2538,6 +2538,9 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                                         const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
+  assert(!getContext().BuiltinInfo.isImmediate(BuiltinID) &&
+         "Should not codegen for consteval builtins");
+
   const FunctionDecl *FD = GD.getDecl()->getAsFunction();
   // See if we can constant fold this builtin.  If so, don't emit it at all.
   // TODO: Extend this handling to all builtin calls that we can constant-fold.
@@ -6241,8 +6244,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   }
 
   // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
-  if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E))
-    return RValue::get(V);
+  if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E, ReturnValue)) {
+    switch (EvalKind) {
+    case TEK_Scalar:
+      if (V->getType()->isVoidTy())
+        return RValue::get(nullptr);
+      return RValue::get(V);
+    case TEK_Aggregate:
+      return RValue::getAggregate(ReturnValue.getAddress(),
+                                  ReturnValue.isVolatile());
+    case TEK_Complex:
+      llvm_unreachable("No current hlsl builtin returns complex");
+    }
+    llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
+  }
 
   if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
     return EmitHipStdParUnsupportedBuiltin(this, FD);
@@ -13481,6 +13496,112 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Int = Intrinsic::aarch64_neon_suqadd;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
   }
+
+  case NEON::BI__builtin_neon_vluti2_laneq_bf16:
+  case NEON::BI__builtin_neon_vluti2_laneq_f16:
+  case NEON::BI__builtin_neon_vluti2_laneq_p16:
+  case NEON::BI__builtin_neon_vluti2_laneq_p8:
+  case NEON::BI__builtin_neon_vluti2_laneq_s16:
+  case NEON::BI__builtin_neon_vluti2_laneq_s8:
+  case NEON::BI__builtin_neon_vluti2_laneq_u16:
+  case NEON::BI__builtin_neon_vluti2_laneq_u8: {
+    Int = Intrinsic::aarch64_neon_vluti2_laneq;
+    llvm::Type *Tys[2];
+    Tys[0] = Ty;
+    Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
+                                             /*isQuad*/ false));
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
+  }
+  case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
+  case NEON::BI__builtin_neon_vluti2q_laneq_f16:
+  case NEON::BI__builtin_neon_vluti2q_laneq_p16:
+  case NEON::BI__builtin_neon_vluti2q_laneq_p8:
+  case NEON::BI__builtin_neon_vluti2q_laneq_s16:
+  case NEON::BI__builtin_neon_vluti2q_laneq_s8:
+  case NEON::BI__builtin_neon_vluti2q_laneq_u16:
+  case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
+    Int = Intrinsic::aarch64_neon_vluti2_laneq;
+    llvm::Type *Tys[2];
+    Tys[0] = Ty;
+    Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
+                                             /*isQuad*/ true));
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
+  }
+  case NEON::BI__builtin_neon_vluti2_lane_bf16:
+  case NEON::BI__builtin_neon_vluti2_lane_f16:
+  case NEON::BI__builtin_neon_vluti2_lane_p16:
+  case NEON::BI__builtin_neon_vluti2_lane_p8:
+  case NEON::BI__builtin_neon_vluti2_lane_s16:
+  case NEON::BI__builtin_neon_vluti2_lane_s8:
+  case NEON::BI__builtin_neon_vluti2_lane_u16:
+  case NEON::BI__builtin_neon_vluti2_lane_u8: {
+    Int = Intrinsic::aarch64_neon_vluti2_lane;
+    llvm::Type *Tys[2];
+    Tys[0] = Ty;
+    Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
+                                             /*isQuad*/ false));
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
+  }
+  case NEON::BI__builtin_neon_vluti2q_lane_bf16:
+  case NEON::BI__builtin_neon_vluti2q_lane_f16:
+  case NEON::BI__builtin_neon_vluti2q_lane_p16:
+  case NEON::BI__builtin_neon_vluti2q_lane_p8:
+  case NEON::BI__builtin_neon_vluti2q_lane_s16:
+  case NEON::BI__builtin_neon_vluti2q_lane_s8:
+  case NEON::BI__builtin_neon_vluti2q_lane_u16:
+  case NEON::BI__builtin_neon_vluti2q_lane_u8: {
+    Int = Intrinsic::aarch64_neon_vluti2_lane;
+    llvm::Type *Tys[2];
+    Tys[0] = Ty;
+    Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
+                                             /*isQuad*/ true));
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
+  }
+  case NEON::BI__builtin_neon_vluti4q_lane_p8:
+  case NEON::BI__builtin_neon_vluti4q_lane_s8:
+  case NEON::BI__builtin_neon_vluti4q_lane_u8: {
+    Int = Intrinsic::aarch64_neon_vluti4q_lane;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
+  }
+  case NEON::BI__builtin_neon_vluti4q_laneq_p8:
+  case NEON::BI__builtin_neon_vluti4q_laneq_s8:
+  case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
+    Int = Intrinsic::aarch64_neon_vluti4q_laneq;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
+  }
+  case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
+  case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
+  case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
+  case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
+  case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
+    Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
+  }
+  case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
+  case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
+  case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
+  case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
+  case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
+    Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
+  }
+
+  case NEON::BI__builtin_neon_vamin_f16:
+  case NEON::BI__builtin_neon_vaminq_f16:
+  case NEON::BI__builtin_neon_vamin_f32:
+  case NEON::BI__builtin_neon_vaminq_f32:
+  case NEON::BI__builtin_neon_vaminq_f64: {
+    Int = Intrinsic::aarch64_neon_famin;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
+  }
+  case NEON::BI__builtin_neon_vamax_f16:
+  case NEON::BI__builtin_neon_vamaxq_f16:
+  case NEON::BI__builtin_neon_vamax_f32:
+  case NEON::BI__builtin_neon_vamaxq_f32:
+  case NEON::BI__builtin_neon_vamaxq_f64: {
+    Int = Intrinsic::aarch64_neon_famax;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
+  }
   }
 }
 
@@ -18531,7 +18652,8 @@ Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
 }
 
 Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
-                                            const CallExpr *E) {
+                                            const CallExpr *E,
+                                            ReturnValueSlot ReturnValue) {
   if (!getLangOpts().HLSL)
     return nullptr;
 
@@ -18718,11 +18840,53 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
         CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
         nullptr, "hlsl.saturate");
   }
+  case Builtin::BI__builtin_hlsl_select: {
+    Value *OpCond = EmitScalarExpr(E->getArg(0));
+    RValue RValTrue = EmitAnyExpr(E->getArg(1));
+    Value *OpTrue =
+        RValTrue.isScalar()
+            ? RValTrue.getScalarVal()
+            : RValTrue.getAggregatePointer(E->getArg(1)->getType(), *this);
+    RValue RValFalse = EmitAnyExpr(E->getArg(2));
+    Value *OpFalse =
+        RValFalse.isScalar()
+            ? RValFalse.getScalarVal()
+            : RValFalse.getAggregatePointer(E->getArg(2)->getType(), *this);
+
+    Value *SelectVal =
+        Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select");
+    if (!RValTrue.isScalar())
+      Builder.CreateStore(SelectVal, ReturnValue.getAddress(),
+                          ReturnValue.isVolatile());
+
+    return SelectVal;
+  }
   case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
     return EmitRuntimeCall(CGM.CreateRuntimeFunction(
         llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index",
         {}, false, true));
   }
+  case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
+    Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
+    return EmitRuntimeCall(Intrinsic::getDeclaration(&CGM.getModule(), ID));
+  }
+  case Builtin::BI__builtin_hlsl_elementwise_sign: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    llvm::Type *Xty = Op0->getType();
+    llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext());
+    if (Xty->isVectorTy()) {
+      auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
+      retType = llvm::VectorType::get(
+          retType, ElementCount::getFixed(XVecTy->getNumElements()));
+    }
+    assert((E->getArg(0)->getType()->hasFloatingRepresentation() ||
+            E->getArg(0)->getType()->hasSignedIntegerRepresentation()) &&
+           "sign operand must have a float or int representation");
+
+    return Builder.CreateIntrinsic(
+        retType, CGM.getHLSLRuntime().getSignIntrinsic(),
+        ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
+  }
   }
   return nullptr;
 }
@@ -19512,6 +19676,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
         F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
             EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
   }
+  case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
+    return emitBuiltinWithOneOverloadedType<2>(
+        *this, E, Intrinsic::amdgcn_s_prefetch_data);
   default:
     return nullptr;
   }
diff --git clang/lib/CodeGen/CGCUDANV.cpp clang/lib/CodeGen/CGCUDANV.cpp
index 59c592771793..ae14d74f2d91 100644
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -840,8 +840,10 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
       FatBinStr = new llvm::GlobalVariable(
           CGM.getModule(), CGM.Int8Ty,
           /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr,
-          "__hip_fatbin_" + CGM.getContext().getCUIDHash(), nullptr,
-          llvm::GlobalVariable::NotThreadLocal);
+          "__hip_fatbin" + (CGM.getLangOpts().CUID.empty()
+                                ? ""
+                                : "_" + CGM.getContext().getCUIDHash()),
+          nullptr, llvm::GlobalVariable::NotThreadLocal);
       cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
     }
 
@@ -894,8 +896,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
   // thread safety of the loaded program. Therefore we can assume sequential
   // execution of constructor functions here.
   if (IsHIP) {
-    auto Linkage = CudaGpuBinary ? llvm::GlobalValue::InternalLinkage
-                                 : llvm::GlobalValue::ExternalLinkage;
+    auto Linkage = RelocatableDeviceCode ? llvm::GlobalValue::ExternalLinkage
+                                         : llvm::GlobalValue::InternalLinkage;
     llvm::BasicBlock *IfBlock =
         llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc);
     llvm::BasicBlock *ExitBlock =
@@ -905,10 +907,11 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
     GpuBinaryHandle = new llvm::GlobalVariable(
         TheModule, PtrTy, /*isConstant=*/false, Linkage,
         /*Initializer=*/
-        CudaGpuBinary ? llvm::ConstantPointerNull::get(PtrTy) : nullptr,
-        CudaGpuBinary
-            ? "__hip_gpubin_handle"
-            : "__hip_gpubin_handle_" + CGM.getContext().getCUIDHash());
+        !RelocatableDeviceCode ? llvm::ConstantPointerNull::get(PtrTy)
+                               : nullptr,
+        "__hip_gpubin_handle" + (CGM.getLangOpts().CUID.empty()
+                                     ? ""
+                                     : "_" + CGM.getContext().getCUIDHash()));
     GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
     // Prevent the weak symbol in different shared libraries being merged.
     if (Linkage != llvm::GlobalValue::InternalLinkage)
diff --git clang/lib/CodeGen/CGCUDARuntime.cpp clang/lib/CodeGen/CGCUDARuntime.cpp
index c14a9d3f2bbb..1e1da1e2411a 100644
--- clang/lib/CodeGen/CGCUDARuntime.cpp
+++ clang/lib/CodeGen/CGCUDARuntime.cpp
@@ -25,7 +25,8 @@ CGCUDARuntime::~CGCUDARuntime() {}
 
 RValue CGCUDARuntime::EmitCUDAKernelCallExpr(CodeGenFunction &CGF,
                                              const CUDAKernelCallExpr *E,
-                                             ReturnValueSlot ReturnValue) {
+                                             ReturnValueSlot ReturnValue,
+                                             llvm::CallBase **CallOrInvoke) {
   llvm::BasicBlock *ConfigOKBlock = CGF.createBasicBlock("kcall.configok");
   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("kcall.end");
 
@@ -35,7 +36,7 @@ RValue CGCUDARuntime::EmitCUDAKernelCallExpr(CodeGenFunction &CGF,
 
   eval.begin(CGF);
   CGF.EmitBlock(ConfigOKBlock);
-  CGF.EmitSimpleCallExpr(E, ReturnValue);
+  CGF.EmitSimpleCallExpr(E, ReturnValue, CallOrInvoke);
   CGF.EmitBranch(ContBlock);
 
   CGF.EmitBlock(ContBlock);
diff --git clang/lib/CodeGen/CGCUDARuntime.h clang/lib/CodeGen/CGCUDARuntime.h
index 8030d632cc3d..86f776004ee7 100644
--- clang/lib/CodeGen/CGCUDARuntime.h
+++ clang/lib/CodeGen/CGCUDARuntime.h
@@ -21,6 +21,7 @@
 #include "llvm/IR/GlobalValue.h"
 
 namespace llvm {
+class CallBase;
 class Function;
 class GlobalVariable;
 }
@@ -82,9 +83,10 @@ public:
   CGCUDARuntime(CodeGenModule &CGM) : CGM(CGM) {}
   virtual ~CGCUDARuntime();
 
-  virtual RValue EmitCUDAKernelCallExpr(CodeGenFunction &CGF,
-                                        const CUDAKernelCallExpr *E,
-                                        ReturnValueSlot ReturnValue);
+  virtual RValue
+  EmitCUDAKernelCallExpr(CodeGenFunction &CGF, const CUDAKernelCallExpr *E,
+                         ReturnValueSlot ReturnValue,
+                         llvm::CallBase **CallOrInvoke = nullptr);
 
   /// Emits a kernel launch stub.
   virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
diff --git clang/lib/CodeGen/CGCXXABI.h clang/lib/CodeGen/CGCXXABI.h
index 7dcc53911199..687ff7fb8444 100644
--- clang/lib/CodeGen/CGCXXABI.h
+++ clang/lib/CodeGen/CGCXXABI.h
@@ -485,11 +485,11 @@ public:
       llvm::PointerUnion<const CXXDeleteExpr *, const CXXMemberCallExpr *>;
 
   /// Emit the ABI-specific virtual destructor call.
-  virtual llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
-                                                 const CXXDestructorDecl *Dtor,
-                                                 CXXDtorType DtorType,
-                                                 Address This,
-                                                 DeleteOrMemberCallExpr E) = 0;
+  virtual llvm::Value *
+  EmitVirtualDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *Dtor,
+                            CXXDtorType DtorType, Address This,
+                            DeleteOrMemberCallExpr E,
+                            llvm::CallBase **CallOrInvoke) = 0;
 
   virtual void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF,
                                                 GlobalDecl GD,
diff --git clang/lib/CodeGen/CGCall.h clang/lib/CodeGen/CGCall.h
index 6fa65e191618..92e0cc43919c 100644
--- clang/lib/CodeGen/CGCall.h
+++ clang/lib/CodeGen/CGCall.h
@@ -450,12 +450,12 @@ inline FnInfoOpts operator&(FnInfoOpts A, FnInfoOpts B) {
                                  llvm::to_underlying(B));
 }
 
-inline FnInfoOpts operator|=(FnInfoOpts A, FnInfoOpts B) {
+inline FnInfoOpts &operator|=(FnInfoOpts &A, FnInfoOpts B) {
   A = A | B;
   return A;
 }
 
-inline FnInfoOpts operator&=(FnInfoOpts A, FnInfoOpts B) {
+inline FnInfoOpts &operator&=(FnInfoOpts &A, FnInfoOpts B) {
   A = A & B;
   return A;
 }
diff --git clang/lib/CodeGen/CGClass.cpp clang/lib/CodeGen/CGClass.cpp
index e5ba50de3462..352955749a63 100644
--- clang/lib/CodeGen/CGClass.cpp
+++ clang/lib/CodeGen/CGClass.cpp
@@ -2192,15 +2192,11 @@ static bool canEmitDelegateCallArgs(CodeGenFunction &CGF,
   return true;
 }
 
-void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
-                                             CXXCtorType Type,
-                                             bool ForVirtualBase,
-                                             bool Delegating,
-                                             Address This,
-                                             CallArgList &Args,
-                                             AggValueSlot::Overlap_t Overlap,
-                                             SourceLocation Loc,
-                                             bool NewPointerIsChecked) {
+void CodeGenFunction::EmitCXXConstructorCall(
+    const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase,
+    bool Delegating, Address This, CallArgList &Args,
+    AggValueSlot::Overlap_t Overlap, SourceLocation Loc,
+    bool NewPointerIsChecked, llvm::CallBase **CallOrInvoke) {
   const CXXRecordDecl *ClassDecl = D->getParent();
 
   if (!NewPointerIsChecked)
@@ -2248,7 +2244,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
   const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall(
       Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs);
   CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(D, Type));
-  EmitCall(Info, Callee, ReturnValueSlot(), Args, nullptr, false, Loc);
+  EmitCall(Info, Callee, ReturnValueSlot(), Args, CallOrInvoke, false, Loc);
 
   // Generate vtable assumptions if we're constructing a complete object
   // with a vtable.  We don't do this for base subobjects for two reasons:
diff --git clang/lib/CodeGen/CGDeclCXX.cpp clang/lib/CodeGen/CGDeclCXX.cpp
index 2f56355cff90..c44f38ef02a3 100644
--- clang/lib/CodeGen/CGDeclCXX.cpp
+++ clang/lib/CodeGen/CGDeclCXX.cpp
@@ -586,31 +586,50 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
                                           PrioritizedCXXGlobalInits.size());
     PrioritizedCXXGlobalInits.push_back(std::make_pair(Key, Fn));
   } else if (isTemplateInstantiation(D->getTemplateSpecializationKind()) ||
-             getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR ||
+             !isUniqueGVALinkage(getContext().GetGVALinkageForVariable(D)) ||
              D->hasAttr<SelectAnyAttr>()) {
+    // For vague linkage globals, put the initializer into its own global_ctors
+    // entry with the global as a comdat key. This ensures at most one
+    // initializer per DSO runs during DSO dynamic initialization.
+    //
+    // For ELF platforms, this is an important code size and startup time
+    // optimization. For dynamic, non-hidden symbols, the weak guard variable
+    // remains to ensure that other DSOs do not re-initialize the global.
+    //
+    // For PE-COFF platforms, there is no guard variable, and COMDAT
+    // associativity is the only way to ensure vauge linkage globals are
+    // initialized exactly once.
+    //
+    // MachO is the only remaining platform with no comdats that doesn't
+    // benefit from this optimization. The rest are mainly modeled on ELF
+    // behavior.
+    //
+    // C++ requires that inline global variables are initialized in source
+    // order, but this requirement does not exist for templated entities.
+    // llvm.global_ctors does not guarantee initialization order, so in
+    // general, Clang does not fully conform to the ordering requirement.
+    // However, in practice, LLVM emits global_ctors in the provided order, and
+    // users typically don't rely on ordering between inline globals in
+    // different headers which are then transitively included in varying order.
+    // Clang's current behavior is a practical tradeoff, since dropping the
+    // comdat would lead to unacceptable impact on code size and startup time.
+    //
+    // FIXME: Find a solution to guarantee source-order initialization of
+    // inline variables.
+    //
     // C++ [basic.start.init]p2:
     //   Definitions of explicitly specialized class template static data
     //   members have ordered initialization. Other class template static data
     //   members (i.e., implicitly or explicitly instantiated specializations)
     //   have unordered initialization.
     //
-    // As a consequence, we can put them into their own llvm.global_ctors entry.
-    //
-    // If the global is externally visible, put the initializer into a COMDAT
-    // group with the global being initialized.  On most platforms, this is a
-    // minor startup time optimization.  In the MS C++ ABI, there are no guard
-    // variables, so this COMDAT key is required for correctness.
-    //
-    // SelectAny globals will be comdat-folded. Put the initializer into a
-    // COMDAT group associated with the global, so the initializers get folded
-    // too.
-    I = DelayedCXXInitPosition.find(D);
     // CXXGlobalInits.size() is the lex order number for the next deferred
     // VarDecl. Use it when the current VarDecl is non-deferred. Although this
     // lex order number is shared between current VarDecl and some following
     // VarDecls, their order of insertion into `llvm.global_ctors` is the same
     // as the lexing order and the following stable sort would preserve such
     // order.
+    I = DelayedCXXInitPosition.find(D);
     unsigned LexOrder =
         I == DelayedCXXInitPosition.end() ? CXXGlobalInits.size() : I->second;
     AddGlobalCtor(Fn, 65535, LexOrder, COMDATKey);
@@ -621,13 +640,13 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
       addUsedGlobal(COMDATKey);
     }
 
-    // If we used a COMDAT key for the global ctor, the init function can be
-    // discarded if the global ctor entry is discarded.
-    // FIXME: Do we need to restrict this to ELF and Wasm?
+    // If comdats are in use and supported, place the initializer function into
+    // the comdat group of the global. In the MS ABI, initializers are mangled
+    // and have their own comdat, so we don't include them in the group for
+    // consistency with MSVC.
     llvm::Comdat *C = Addr->getComdat();
-    if (COMDATKey && C &&
-        (getTarget().getTriple().isOSBinFormatELF() ||
-         getTarget().getTriple().isOSBinFormatWasm())) {
+    if (COMDATKey && C && getTriple().supportsCOMDAT() &&
+        !getTarget().getCXXABI().isMicrosoft()) {
       Fn->setComdat(C);
     }
   } else {
diff --git clang/lib/CodeGen/CGExpr.cpp clang/lib/CodeGen/CGExpr.cpp
index 99cd61b9e789..35b5daaf6d4b 100644
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -33,6 +33,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Intrinsics.h"
@@ -5544,16 +5545,30 @@ RValue CodeGenFunction::EmitRValueForField(LValue LV,
 //===--------------------------------------------------------------------===//
 
 RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
-                                     ReturnValueSlot ReturnValue) {
+                                     ReturnValueSlot ReturnValue,
+                                     llvm::CallBase **CallOrInvoke) {
+  llvm::CallBase *CallOrInvokeStorage;
+  if (!CallOrInvoke) {
+    CallOrInvoke = &CallOrInvokeStorage;
+  }
+
+  auto AddCoroElideSafeOnExit = llvm::make_scope_exit([&] {
+    if (E->isCoroElideSafe()) {
+      auto *I = *CallOrInvoke;
+      if (I)
+        I->addFnAttr(llvm::Attribute::CoroElideSafe);
+    }
+  });
+
   // Builtins never have block type.
   if (E->getCallee()->getType()->isBlockPointerType())
-    return EmitBlockCallExpr(E, ReturnValue);
+    return EmitBlockCallExpr(E, ReturnValue, CallOrInvoke);
 
   if (const auto *CE = dyn_cast<CXXMemberCallExpr>(E))
-    return EmitCXXMemberCallExpr(CE, ReturnValue);
+    return EmitCXXMemberCallExpr(CE, ReturnValue, CallOrInvoke);
 
   if (const auto *CE = dyn_cast<CUDAKernelCallExpr>(E))
-    return EmitCUDAKernelCallExpr(CE, ReturnValue);
+    return EmitCUDAKernelCallExpr(CE, ReturnValue, CallOrInvoke);
 
   // A CXXOperatorCallExpr is created even for explicit object methods, but
   // these should be treated like static function call.
@@ -5561,7 +5576,7 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
     if (const auto *MD =
             dyn_cast_if_present<CXXMethodDecl>(CE->getCalleeDecl());
         MD && MD->isImplicitObjectMemberFunction())
-      return EmitCXXOperatorMemberCallExpr(CE, MD, ReturnValue);
+      return EmitCXXOperatorMemberCallExpr(CE, MD, ReturnValue, CallOrInvoke);
 
   CGCallee callee = EmitCallee(E->getCallee());
 
@@ -5574,14 +5589,17 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
     return EmitCXXPseudoDestructorExpr(callee.getPseudoDestructorExpr());
   }
 
-  return EmitCall(E->getCallee()->getType(), callee, E, ReturnValue);
+  return EmitCall(E->getCallee()->getType(), callee, E, ReturnValue,
+                  /*Chain=*/nullptr, CallOrInvoke);
 }
 
 /// Emit a CallExpr without considering whether it might be a subclass.
 RValue CodeGenFunction::EmitSimpleCallExpr(const CallExpr *E,
-                                           ReturnValueSlot ReturnValue) {
+                                           ReturnValueSlot ReturnValue,
+                                           llvm::CallBase **CallOrInvoke) {
   CGCallee Callee = EmitCallee(E->getCallee());
-  return EmitCall(E->getCallee()->getType(), Callee, E, ReturnValue);
+  return EmitCall(E->getCallee()->getType(), Callee, E, ReturnValue,
+                  /*Chain=*/nullptr, CallOrInvoke);
 }
 
 // Detect the unusual situation where an inline version is shadowed by a
@@ -5785,8 +5803,9 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) {
   llvm_unreachable("bad evaluation kind");
 }
 
-LValue CodeGenFunction::EmitCallExprLValue(const CallExpr *E) {
-  RValue RV = EmitCallExpr(E);
+LValue CodeGenFunction::EmitCallExprLValue(const CallExpr *E,
+                                           llvm::CallBase **CallOrInvoke) {
+  RValue RV = EmitCallExpr(E, ReturnValueSlot(), CallOrInvoke);
 
   if (!RV.isScalar())
     return MakeAddrLValue(RV.getAggregateAddress(), E->getType(),
@@ -5909,9 +5928,11 @@ LValue CodeGenFunction::EmitStmtExprLValue(const StmtExpr *E) {
                         AlignmentSource::Decl);
 }
 
-RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee,
-                                 const CallExpr *E, ReturnValueSlot ReturnValue,
-                                 llvm::Value *Chain) {
+RValue CodeGenFunction::EmitCall(QualType CalleeType,
+                                 const CGCallee &OrigCallee, const CallExpr *E,
+                                 ReturnValueSlot ReturnValue,
+                                 llvm::Value *Chain,
+                                 llvm::CallBase **CallOrInvoke) {
   // Get the actual function type. The callee type will always be a pointer to
   // function type or a block pointer type.
   assert(CalleeType->isFunctionPointerType() &&
@@ -6131,8 +6152,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
         Address(Handle, Handle->getType(), CGM.getPointerAlign()));
     Callee.setFunctionPointer(Stub);
   }
-  llvm::CallBase *CallOrInvoke = nullptr;
-  RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &CallOrInvoke,
+  llvm::CallBase *LocalCallOrInvoke = nullptr;
+  RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &LocalCallOrInvoke,
                          E == MustTailCall, E->getExprLoc());
 
   // Generate function declaration DISuprogram in order to be used
@@ -6141,11 +6162,13 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
     if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
       FunctionArgList Args;
       QualType ResTy = BuildFunctionArgList(CalleeDecl, Args);
-      DI->EmitFuncDeclForCallSite(CallOrInvoke,
+      DI->EmitFuncDeclForCallSite(LocalCallOrInvoke,
                                   DI->getFunctionType(CalleeDecl, ResTy, Args),
                                   CalleeDecl);
     }
   }
+  if (CallOrInvoke)
+    *CallOrInvoke = LocalCallOrInvoke;
 
   return Call;
 }
diff --git clang/lib/CodeGen/CGExprCXX.cpp clang/lib/CodeGen/CGExprCXX.cpp
index 8eb6ab7381ac..1214bb054fb8 100644
--- clang/lib/CodeGen/CGExprCXX.cpp
+++ clang/lib/CodeGen/CGExprCXX.cpp
@@ -84,23 +84,24 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, GlobalDecl GD,
 
 RValue CodeGenFunction::EmitCXXMemberOrOperatorCall(
     const CXXMethodDecl *MD, const CGCallee &Callee,
-    ReturnValueSlot ReturnValue,
-    llvm::Value *This, llvm::Value *ImplicitParam, QualType ImplicitParamTy,
-    const CallExpr *CE, CallArgList *RtlArgs) {
+    ReturnValueSlot ReturnValue, llvm::Value *This, llvm::Value *ImplicitParam,
+    QualType ImplicitParamTy, const CallExpr *CE, CallArgList *RtlArgs,
+    llvm::CallBase **CallOrInvoke) {
   const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
   CallArgList Args;
   MemberCallInfo CallInfo = commonEmitCXXMemberOrOperatorCall(
       *this, MD, This, ImplicitParam, ImplicitParamTy, CE, Args, RtlArgs);
   auto &FnInfo = CGM.getTypes().arrangeCXXMethodCall(
       Args, FPT, CallInfo.ReqArgs, CallInfo.PrefixSize);
-  return EmitCall(FnInfo, Callee, ReturnValue, Args, nullptr,
+  return EmitCall(FnInfo, Callee, ReturnValue, Args, CallOrInvoke,
                   CE && CE == MustTailCall,
                   CE ? CE->getExprLoc() : SourceLocation());
 }
 
 RValue CodeGenFunction::EmitCXXDestructorCall(
     GlobalDecl Dtor, const CGCallee &Callee, llvm::Value *This, QualType ThisTy,
-    llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE) {
+    llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE,
+    llvm::CallBase **CallOrInvoke) {
   const CXXMethodDecl *DtorDecl = cast<CXXMethodDecl>(Dtor.getDecl());
 
   assert(!ThisTy.isNull());
@@ -120,7 +121,8 @@ RValue CodeGenFunction::EmitCXXDestructorCall(
   commonEmitCXXMemberOrOperatorCall(*this, Dtor, This, ImplicitParam,
                                     ImplicitParamTy, CE, Args, nullptr);
   return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(Dtor), Callee,
-                  ReturnValueSlot(), Args, nullptr, CE && CE == MustTailCall,
+                  ReturnValueSlot(), Args, CallOrInvoke,
+                  CE && CE == MustTailCall,
                   CE ? CE->getExprLoc() : SourceLocation{});
 }
 
@@ -186,11 +188,12 @@ static CXXRecordDecl *getCXXRecord(const Expr *E) {
 // Note: This function also emit constructor calls to support a MSVC
 // extensions allowing explicit constructor function call.
 RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
-                                              ReturnValueSlot ReturnValue) {
+                                              ReturnValueSlot ReturnValue,
+                                              llvm::CallBase **CallOrInvoke) {
   const Expr *callee = CE->getCallee()->IgnoreParens();
 
   if (isa<BinaryOperator>(callee))
-    return EmitCXXMemberPointerCallExpr(CE, ReturnValue);
+    return EmitCXXMemberPointerCallExpr(CE, ReturnValue, CallOrInvoke);
 
   const MemberExpr *ME = cast<MemberExpr>(callee);
   const CXXMethodDecl *MD = cast<CXXMethodDecl>(ME->getMemberDecl());
@@ -200,7 +203,7 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
     CGCallee callee =
         CGCallee::forDirect(CGM.GetAddrOfFunction(MD), GlobalDecl(MD));
     return EmitCall(getContext().getPointerType(MD->getType()), callee, CE,
-                    ReturnValue);
+                    ReturnValue, /*Chain=*/nullptr, CallOrInvoke);
   }
 
   bool HasQualifier = ME->hasQualifier();
@@ -208,14 +211,15 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
   bool IsArrow = ME->isArrow();
   const Expr *Base = ME->getBase();
 
-  return EmitCXXMemberOrOperatorMemberCallExpr(
-      CE, MD, ReturnValue, HasQualifier, Qualifier, IsArrow, Base);
+  return EmitCXXMemberOrOperatorMemberCallExpr(CE, MD, ReturnValue,
+                                               HasQualifier, Qualifier, IsArrow,
+                                               Base, CallOrInvoke);
 }
 
 RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
     const CallExpr *CE, const CXXMethodDecl *MD, ReturnValueSlot ReturnValue,
     bool HasQualifier, NestedNameSpecifier *Qualifier, bool IsArrow,
-    const Expr *Base) {
+    const Expr *Base, llvm::CallBase **CallOrInvoke) {
   assert(isa<CXXMemberCallExpr>(CE) || isa<CXXOperatorCallExpr>(CE));
 
   // Compute the object pointer.
@@ -300,7 +304,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
     EmitCXXConstructorCall(Ctor, Ctor_Complete, /*ForVirtualBase=*/false,
                            /*Delegating=*/false, This.getAddress(), Args,
                            AggValueSlot::DoesNotOverlap, CE->getExprLoc(),
-                           /*NewPointerIsChecked=*/false);
+                           /*NewPointerIsChecked=*/false, CallOrInvoke);
     return RValue::get(nullptr);
   }
 
@@ -374,9 +378,9 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
            "Destructor shouldn't have explicit parameters");
     assert(ReturnValue.isNull() && "Destructor shouldn't have return value");
     if (UseVirtualCall) {
-      CGM.getCXXABI().EmitVirtualDestructorCall(*this, Dtor, Dtor_Complete,
-                                                This.getAddress(),
-                                                cast<CXXMemberCallExpr>(CE));
+      CGM.getCXXABI().EmitVirtualDestructorCall(
+          *this, Dtor, Dtor_Complete, This.getAddress(),
+          cast<CXXMemberCallExpr>(CE), CallOrInvoke);
     } else {
       GlobalDecl GD(Dtor, Dtor_Complete);
       CGCallee Callee;
@@ -393,7 +397,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
           IsArrow ? Base->getType()->getPointeeType() : Base->getType();
       EmitCXXDestructorCall(GD, Callee, This.getPointer(*this), ThisTy,
                             /*ImplicitParam=*/nullptr,
-                            /*ImplicitParamTy=*/QualType(), CE);
+                            /*ImplicitParamTy=*/QualType(), CE, CallOrInvoke);
     }
     return RValue::get(nullptr);
   }
@@ -435,12 +439,13 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
 
   return EmitCXXMemberOrOperatorCall(
       CalleeDecl, Callee, ReturnValue, This.getPointer(*this),
-      /*ImplicitParam=*/nullptr, QualType(), CE, RtlArgs);
+      /*ImplicitParam=*/nullptr, QualType(), CE, RtlArgs, CallOrInvoke);
 }
 
 RValue
 CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
-                                              ReturnValueSlot ReturnValue) {
+                                              ReturnValueSlot ReturnValue,
+                                              llvm::CallBase **CallOrInvoke) {
   const BinaryOperator *BO =
       cast<BinaryOperator>(E->getCallee()->IgnoreParens());
   const Expr *BaseExpr = BO->getLHS();
@@ -484,24 +489,25 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
   EmitCallArgs(Args, FPT, E->arguments());
   return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required,
                                                       /*PrefixSize=*/0),
-                  Callee, ReturnValue, Args, nullptr, E == MustTailCall,
+                  Callee, ReturnValue, Args, CallOrInvoke, E == MustTailCall,
                   E->getExprLoc());
 }
 
-RValue
-CodeGenFunction::EmitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E,
-                                               const CXXMethodDecl *MD,
-                                               ReturnValueSlot ReturnValue) {
+RValue CodeGenFunction::EmitCXXOperatorMemberCallExpr(
+    const CXXOperatorCallExpr *E, const CXXMethodDecl *MD,
+    ReturnValueSlot ReturnValue, llvm::CallBase **CallOrInvoke) {
   assert(MD->isImplicitObjectMemberFunction() &&
          "Trying to emit a member call expr on a static method!");
   return EmitCXXMemberOrOperatorMemberCallExpr(
       E, MD, ReturnValue, /*HasQualifier=*/false, /*Qualifier=*/nullptr,
-      /*IsArrow=*/false, E->getArg(0));
+      /*IsArrow=*/false, E->getArg(0), CallOrInvoke);
 }
 
 RValue CodeGenFunction::EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
-                                               ReturnValueSlot ReturnValue) {
-  return CGM.getCUDARuntime().EmitCUDAKernelCallExpr(*this, E, ReturnValue);
+                                               ReturnValueSlot ReturnValue,
+                                               llvm::CallBase **CallOrInvoke) {
+  return CGM.getCUDARuntime().EmitCUDAKernelCallExpr(*this, E, ReturnValue,
+                                                     CallOrInvoke);
 }
 
 static void EmitNullBaseClassInitialization(CodeGenFunction &CGF,
diff --git clang/lib/CodeGen/CGExprScalar.cpp clang/lib/CodeGen/CGExprScalar.cpp
index 7aa2d3d89c29..82caf65ac68d 100644
--- clang/lib/CodeGen/CGExprScalar.cpp
+++ clang/lib/CodeGen/CGExprScalar.cpp
@@ -36,6 +36,7 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/FixedPointBuilder.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GEPNoWrapFlags.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Intrinsics.h"
@@ -1454,6 +1455,10 @@ Value *ScalarExprEmitter::EmitScalarCast(Value *Src, QualType SrcType,
     return Builder.CreateFPToUI(Src, DstTy, "conv");
   }
 
+  if ((DstElementTy->is16bitFPTy() && SrcElementTy->is16bitFPTy())) {
+    Value *FloatVal = Builder.CreateFPExt(Src, Builder.getFloatTy(), "fpext");
+    return Builder.CreateFPTrunc(FloatVal, DstTy, "fptrunc");
+  }
   if (DstElementTy->getTypeID() < SrcElementTy->getTypeID())
     return Builder.CreateFPTrunc(Src, DstTy, "conv");
   return Builder.CreateFPExt(Src, DstTy, "conv");
@@ -2704,14 +2709,19 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     return CGF.CGM.createOpenCLIntToSamplerConversion(E, CGF);
 
   case CK_HLSLVectorTruncation: {
-    assert(DestTy->isVectorType() && "Expected dest type to be vector type");
+    assert((DestTy->isVectorType() || DestTy->isBuiltinType()) &&
+           "Destination type must be a vector or builtin type.");
     Value *Vec = Visit(const_cast<Expr *>(E));
-    SmallVector<int, 16> Mask;
-    unsigned NumElts = DestTy->castAs<VectorType>()->getNumElements();
-    for (unsigned I = 0; I != NumElts; ++I)
-      Mask.push_back(I);
+    if (auto *VecTy = DestTy->getAs<VectorType>()) {
+      SmallVector<int> Mask;
+      unsigned NumElts = VecTy->getNumElements();
+      for (unsigned I = 0; I != NumElts; ++I)
+        Mask.push_back(I);
 
-    return Builder.CreateShuffleVector(Vec, Mask, "trunc");
+      return Builder.CreateShuffleVector(Vec, Mask, "trunc");
+    }
+    llvm::Value *Zero = llvm::Constant::getNullValue(CGF.SizeTy);
+    return Builder.CreateExtractElement(Vec, Zero, "cast.vtrunc");
   }
 
   } // end of switch
@@ -5759,7 +5769,12 @@ CodeGenFunction::EmitCheckedInBoundsGEP(llvm::Type *ElemTy, Value *Ptr,
                                         bool SignedIndices, bool IsSubtraction,
                                         SourceLocation Loc, const Twine &Name) {
   llvm::Type *PtrTy = Ptr->getType();
-  Value *GEPVal = Builder.CreateInBoundsGEP(ElemTy, Ptr, IdxList, Name);
+
+  llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::inBounds();
+  if (!SignedIndices && !IsSubtraction)
+    NWFlags |= llvm::GEPNoWrapFlags::noUnsignedWrap();
+
+  Value *GEPVal = Builder.CreateGEP(ElemTy, Ptr, IdxList, Name, NWFlags);
 
   // If the pointer overflow sanitizer isn't enabled, do nothing.
   if (!SanOpts.has(SanitizerKind::PointerOverflow))
@@ -5874,8 +5889,13 @@ Address CodeGenFunction::EmitCheckedInBoundsGEP(
     Address Addr, ArrayRef<Value *> IdxList, llvm::Type *elementType,
     bool SignedIndices, bool IsSubtraction, SourceLocation Loc, CharUnits Align,
     const Twine &Name) {
-  if (!SanOpts.has(SanitizerKind::PointerOverflow))
-    return Builder.CreateInBoundsGEP(Addr, IdxList, elementType, Align, Name);
+  if (!SanOpts.has(SanitizerKind::PointerOverflow)) {
+    llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::inBounds();
+    if (!SignedIndices && !IsSubtraction)
+      NWFlags |= llvm::GEPNoWrapFlags::noUnsignedWrap();
+
+    return Builder.CreateGEP(Addr, IdxList, elementType, Align, Name, NWFlags);
+  }
 
   return RawAddress(
       EmitCheckedInBoundsGEP(Addr.getElementType(), Addr.emitRawPointer(*this),
diff --git clang/lib/CodeGen/CGHLSLRuntime.cpp clang/lib/CodeGen/CGHLSLRuntime.cpp
index 4bd7b6ba58de..b6e6555e63fc 100644
--- clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -295,13 +295,14 @@ void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) {
   // inside the record decl
   for (auto *FD : RD->fields()) {
     const auto *HLSLResAttr = FD->getAttr<HLSLResourceAttr>();
-    const auto *HLSLResClassAttr = FD->getAttr<HLSLResourceClassAttr>();
-    if (!HLSLResAttr || !HLSLResClassAttr)
+    const HLSLAttributedResourceType *AttrResType =
+        dyn_cast<HLSLAttributedResourceType>(FD->getType().getTypePtr());
+    if (!HLSLResAttr || !AttrResType)
       continue;
 
-    llvm::hlsl::ResourceClass RC = HLSLResClassAttr->getResourceClass();
+    llvm::hlsl::ResourceClass RC = AttrResType->getAttrs().ResourceClass;
+    bool IsROV = AttrResType->getAttrs().IsROV;
     llvm::hlsl::ResourceKind RK = HLSLResAttr->getResourceKind();
-    bool IsROV = FD->hasAttr<HLSLROVAttr>();
     llvm::hlsl::ElementType ET = calculateElementType(CGM.getContext(), Ty);
 
     BufferResBinding Binding(D->getAttr<HLSLResourceBindingAttr>());
diff --git clang/lib/CodeGen/CGHLSLRuntime.h clang/lib/CodeGen/CGHLSLRuntime.h
index 55a4b97c160c..6e226808fcba 100644
--- clang/lib/CodeGen/CGHLSLRuntime.h
+++ clang/lib/CodeGen/CGHLSLRuntime.h
@@ -80,10 +80,12 @@ public:
   GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(Sign, sign)
   GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
   GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
   GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
   GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
 
   //===----------------------------------------------------------------------===//
   // End of reserved area for HLSL intrinsic getters.
diff --git clang/lib/CodeGen/CGObjCGNU.cpp clang/lib/CodeGen/CGObjCGNU.cpp
index adc7cdbfded8..6280e9465ecb 100644
--- clang/lib/CodeGen/CGObjCGNU.cpp
+++ clang/lib/CodeGen/CGObjCGNU.cpp
@@ -1699,11 +1699,18 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
   llvm::Value *EmitIvarOffset(CodeGenFunction &CGF,
                               const ObjCInterfaceDecl *Interface,
                               const ObjCIvarDecl *Ivar) override {
-    const std::string Name = GetIVarOffsetVariableName(Ivar->getContainingInterface(), Ivar);
+    const ObjCInterfaceDecl *ContainingInterface =
+        Ivar->getContainingInterface();
+    const std::string Name =
+        GetIVarOffsetVariableName(ContainingInterface, Ivar);
     llvm::GlobalVariable *IvarOffsetPointer = TheModule.getNamedGlobal(Name);
-    if (!IvarOffsetPointer)
+    if (!IvarOffsetPointer) {
       IvarOffsetPointer = new llvm::GlobalVariable(TheModule, IntTy, false,
               llvm::GlobalValue::ExternalLinkage, nullptr, Name);
+      if (Ivar->getAccessControl() != ObjCIvarDecl::Private &&
+          Ivar->getAccessControl() != ObjCIvarDecl::Package)
+        CGM.setGVProperties(IvarOffsetPointer, ContainingInterface);
+    }
     CharUnits Align = CGM.getIntAlign();
     llvm::Value *Offset =
         CGF.Builder.CreateAlignedLoad(IntTy, IvarOffsetPointer, Align);
diff --git clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 23b977be8160..9cf597a65be0 100644
--- clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1327,25 +1327,24 @@ llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
 
 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
                                              bool AtCurrentPoint) {
-  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
-  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
+  auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
+  assert(!Elem.ServiceInsertPt && "Insert point is set already.");
 
   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
   if (AtCurrentPoint) {
-    Elem.second.ServiceInsertPt = new llvm::BitCastInst(
-        Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
+    Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
+                                                 CGF.Builder.GetInsertBlock());
   } else {
-    Elem.second.ServiceInsertPt =
-        new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
-    Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
+    Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
+    Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
   }
 }
 
 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
-  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
-  if (Elem.second.ServiceInsertPt) {
-    llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
-    Elem.second.ServiceInsertPt = nullptr;
+  auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
+  if (Elem.ServiceInsertPt) {
+    llvm::Instruction *Ptr = Elem.ServiceInsertPt;
+    Elem.ServiceInsertPt = nullptr;
     Ptr->eraseFromParent();
   }
 }
@@ -1441,18 +1440,18 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
   // kmpc_global_thread_num(ident_t *loc).
   // Generate thread id value and cache this value for use across the
   // function.
-  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
-  if (!Elem.second.ServiceInsertPt)
+  auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
+  if (!Elem.ServiceInsertPt)
     setLocThreadIdInsertPt(CGF);
   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
-  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
+  CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
   llvm::CallInst *Call = CGF.Builder.CreateCall(
       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
                                             OMPRTL___kmpc_global_thread_num),
       emitUpdateLocation(CGF, Loc));
   Call->setCallingConv(CGF.getRuntimeCC());
-  Elem.second.ThreadID = Call;
+  Elem.ThreadID = Call;
   return Call;
 }
 
@@ -11675,9 +11674,7 @@ CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
                                                         const VarDecl *VD) {
   ASTContext &C = CGM.getContext();
-  auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
-  if (I == LastprivateConditionalToTypes.end())
-    I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
+  auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
   QualType NewType;
   const FieldDecl *VDField;
   const FieldDecl *FiredField;
diff --git clang/lib/CodeGen/CMakeLists.txt clang/lib/CodeGen/CMakeLists.txt
index deb7b27266d7..aa0c871c5352 100644
--- clang/lib/CodeGen/CMakeLists.txt
+++ clang/lib/CodeGen/CMakeLists.txt
@@ -122,6 +122,7 @@ add_clang_library(clangCodeGen
   Targets/AVR.cpp
   Targets/BPF.cpp
   Targets/CSKY.cpp
+  Targets/DirectX.cpp
   Targets/Hexagon.cpp
   Targets/Lanai.cpp
   Targets/LoongArch.cpp
diff --git clang/lib/CodeGen/CodeGenFunction.h clang/lib/CodeGen/CodeGenFunction.h
index 368fc112187f..4eca770ca35d 100644
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -1143,17 +1143,11 @@ public:
     /// Copy all the entries in the source map over the corresponding
     /// entries in the destination, which must exist.
     static void copyInto(const DeclMapTy &Src, DeclMapTy &Dest) {
-      for (auto &Pair : Src) {
-        if (!Pair.second.isValid()) {
-          Dest.erase(Pair.first);
-          continue;
-        }
-
-        auto I = Dest.find(Pair.first);
-        if (I != Dest.end())
-          I->second = Pair.second;
+      for (auto &[Decl, Addr] : Src) {
+        if (!Addr.isValid())
+          Dest.erase(Decl);
         else
-          Dest.insert(Pair);
+          Dest.insert_or_assign(Decl, Addr);
       }
     }
   };
@@ -3155,7 +3149,8 @@ public:
                               bool ForVirtualBase, bool Delegating,
                               Address This, CallArgList &Args,
                               AggValueSlot::Overlap_t Overlap,
-                              SourceLocation Loc, bool NewPointerIsChecked);
+                              SourceLocation Loc, bool NewPointerIsChecked,
+                              llvm::CallBase **CallOrInvoke = nullptr);
 
   /// Emit assumption load for all bases. Requires to be called only on
   /// most-derived class and not under construction of the object.
@@ -4275,7 +4270,8 @@ public:
   LValue EmitBinaryOperatorLValue(const BinaryOperator *E);
   LValue EmitCompoundAssignmentLValue(const CompoundAssignOperator *E);
   // Note: only available for agg return types
-  LValue EmitCallExprLValue(const CallExpr *E);
+  LValue EmitCallExprLValue(const CallExpr *E,
+                            llvm::CallBase **CallOrInvoke = nullptr);
   // Note: only available for agg return types
   LValue EmitVAArgExprLValue(const VAArgExpr *E);
   LValue EmitDeclRefLValue(const DeclRefExpr *E);
@@ -4388,21 +4384,27 @@ public:
   /// LLVM arguments and the types they were derived from.
   RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee,
                   ReturnValueSlot ReturnValue, const CallArgList &Args,
-                  llvm::CallBase **callOrInvoke, bool IsMustTail,
+                  llvm::CallBase **CallOrInvoke, bool IsMustTail,
                   SourceLocation Loc,
                   bool IsVirtualFunctionPointerThunk = false);
   RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee,
                   ReturnValueSlot ReturnValue, const CallArgList &Args,
-                  llvm::CallBase **callOrInvoke = nullptr,
+                  llvm::CallBase **CallOrInvoke = nullptr,
                   bool IsMustTail = false) {
-    return EmitCall(CallInfo, Callee, ReturnValue, Args, callOrInvoke,
+    return EmitCall(CallInfo, Callee, ReturnValue, Args, CallOrInvoke,
                     IsMustTail, SourceLocation());
   }
   RValue EmitCall(QualType FnType, const CGCallee &Callee, const CallExpr *E,
-                  ReturnValueSlot ReturnValue, llvm::Value *Chain = nullptr);
+                  ReturnValueSlot ReturnValue, llvm::Value *Chain = nullptr,
+                  llvm::CallBase **CallOrInvoke = nullptr);
+
+  // If a Call or Invoke instruction was emitted for this CallExpr, this method
+  // writes the pointer to `CallOrInvoke` if it's not null.
   RValue EmitCallExpr(const CallExpr *E,
-                      ReturnValueSlot ReturnValue = ReturnValueSlot());
-  RValue EmitSimpleCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue);
+                      ReturnValueSlot ReturnValue = ReturnValueSlot(),
+                      llvm::CallBase **CallOrInvoke = nullptr);
+  RValue EmitSimpleCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue,
+                            llvm::CallBase **CallOrInvoke = nullptr);
   CGCallee EmitCallee(const Expr *E);
 
   void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl);
@@ -4506,25 +4508,23 @@ public:
   void callCStructCopyAssignmentOperator(LValue Dst, LValue Src);
   void callCStructMoveAssignmentOperator(LValue Dst, LValue Src);
 
-  RValue
-  EmitCXXMemberOrOperatorCall(const CXXMethodDecl *Method,
-                              const CGCallee &Callee,
-                              ReturnValueSlot ReturnValue, llvm::Value *This,
-                              llvm::Value *ImplicitParam,
-                              QualType ImplicitParamTy, const CallExpr *E,
-                              CallArgList *RtlArgs);
+  RValue EmitCXXMemberOrOperatorCall(
+      const CXXMethodDecl *Method, const CGCallee &Callee,
+      ReturnValueSlot ReturnValue, llvm::Value *This,
+      llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *E,
+      CallArgList *RtlArgs, llvm::CallBase **CallOrInvoke);
   RValue EmitCXXDestructorCall(GlobalDecl Dtor, const CGCallee &Callee,
                                llvm::Value *This, QualType ThisTy,
                                llvm::Value *ImplicitParam,
-                               QualType ImplicitParamTy, const CallExpr *E);
+                               QualType ImplicitParamTy, const CallExpr *E,
+                               llvm::CallBase **CallOrInvoke = nullptr);
   RValue EmitCXXMemberCallExpr(const CXXMemberCallExpr *E,
-                               ReturnValueSlot ReturnValue);
-  RValue EmitCXXMemberOrOperatorMemberCallExpr(const CallExpr *CE,
-                                               const CXXMethodDecl *MD,
-                                               ReturnValueSlot ReturnValue,
-                                               bool HasQualifier,
-                                               NestedNameSpecifier *Qualifier,
-                                               bool IsArrow, const Expr *Base);
+                               ReturnValueSlot ReturnValue,
+                               llvm::CallBase **CallOrInvoke = nullptr);
+  RValue EmitCXXMemberOrOperatorMemberCallExpr(
+      const CallExpr *CE, const CXXMethodDecl *MD, ReturnValueSlot ReturnValue,
+      bool HasQualifier, NestedNameSpecifier *Qualifier, bool IsArrow,
+      const Expr *Base, llvm::CallBase **CallOrInvoke);
   // Compute the object pointer.
   Address EmitCXXMemberDataPointerAddress(const Expr *E, Address base,
                                           llvm::Value *memberPtr,
@@ -4532,15 +4532,18 @@ public:
                                           LValueBaseInfo *BaseInfo = nullptr,
                                           TBAAAccessInfo *TBAAInfo = nullptr);
   RValue EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
-                                      ReturnValueSlot ReturnValue);
+                                      ReturnValueSlot ReturnValue,
+                                      llvm::CallBase **CallOrInvoke);
 
   RValue EmitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E,
                                        const CXXMethodDecl *MD,
-                                       ReturnValueSlot ReturnValue);
+                                       ReturnValueSlot ReturnValue,
+                                       llvm::CallBase **CallOrInvoke);
   RValue EmitCXXPseudoDestructorExpr(const CXXPseudoDestructorExpr *E);
 
   RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
-                                ReturnValueSlot ReturnValue);
+                                ReturnValueSlot ReturnValue,
+                                llvm::CallBase **CallOrInvoke);
 
   RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E);
   RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E);
@@ -4562,7 +4565,8 @@ public:
       const analyze_os_log::OSLogBufferLayout &Layout,
       CharUnits BufferAlignment);
 
-  RValue EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue);
+  RValue EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue,
+                           llvm::CallBase **CallOrInvoke);
 
   /// EmitTargetBuiltinExpr - Emit the given builtin call. Returns 0 if the call
   /// is unhandled by the current target.
@@ -4700,7 +4704,8 @@ public:
   llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
-  llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                   ReturnValueSlot ReturnValue);
   llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
                                            const CallExpr *E);
   llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
diff --git clang/lib/CodeGen/CodeGenModule.cpp clang/lib/CodeGen/CodeGenModule.cpp
index df4c13c9ad97..50fa48656009 100644
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -298,6 +298,8 @@ createTargetCodeGenInfo(CodeGenModule &CGM) {
   case llvm::Triple::spirv32:
   case llvm::Triple::spirv64:
     return createSPIRVTargetCodeGenInfo(CGM);
+  case llvm::Triple::dxil:
+    return createDirectXTargetCodeGenInfo(CGM);
   case llvm::Triple::ve:
     return createVETargetCodeGenInfo(CGM);
   case llvm::Triple::csky: {
diff --git clang/lib/CodeGen/CodeGenTypes.cpp clang/lib/CodeGen/CodeGenTypes.cpp
index 11a577bbdd07..5eebd8ad2a06 100644
--- clang/lib/CodeGen/CodeGenTypes.cpp
+++ clang/lib/CodeGen/CodeGenTypes.cpp
@@ -500,63 +500,19 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     case BuiltinType::OCLReserveID:
       ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty);
       break;
-    case BuiltinType::SveInt8:
-    case BuiltinType::SveUint8:
-    case BuiltinType::SveInt8x2:
-    case BuiltinType::SveUint8x2:
-    case BuiltinType::SveInt8x3:
-    case BuiltinType::SveUint8x3:
-    case BuiltinType::SveInt8x4:
-    case BuiltinType::SveUint8x4:
-    case BuiltinType::SveInt16:
-    case BuiltinType::SveUint16:
-    case BuiltinType::SveInt16x2:
-    case BuiltinType::SveUint16x2:
-    case BuiltinType::SveInt16x3:
-    case BuiltinType::SveUint16x3:
-    case BuiltinType::SveInt16x4:
-    case BuiltinType::SveUint16x4:
-    case BuiltinType::SveInt32:
-    case BuiltinType::SveUint32:
-    case BuiltinType::SveInt32x2:
-    case BuiltinType::SveUint32x2:
-    case BuiltinType::SveInt32x3:
-    case BuiltinType::SveUint32x3:
-    case BuiltinType::SveInt32x4:
-    case BuiltinType::SveUint32x4:
-    case BuiltinType::SveInt64:
-    case BuiltinType::SveUint64:
-    case BuiltinType::SveInt64x2:
-    case BuiltinType::SveUint64x2:
-    case BuiltinType::SveInt64x3:
-    case BuiltinType::SveUint64x3:
-    case BuiltinType::SveInt64x4:
-    case BuiltinType::SveUint64x4:
-    case BuiltinType::SveBool:
-    case BuiltinType::SveBoolx2:
-    case BuiltinType::SveBoolx4:
-    case BuiltinType::SveFloat16:
-    case BuiltinType::SveFloat16x2:
-    case BuiltinType::SveFloat16x3:
-    case BuiltinType::SveFloat16x4:
-    case BuiltinType::SveFloat32:
-    case BuiltinType::SveFloat32x2:
-    case BuiltinType::SveFloat32x3:
-    case BuiltinType::SveFloat32x4:
-    case BuiltinType::SveFloat64:
-    case BuiltinType::SveFloat64x2:
-    case BuiltinType::SveFloat64x3:
-    case BuiltinType::SveFloat64x4:
-    case BuiltinType::SveBFloat16:
-    case BuiltinType::SveBFloat16x2:
-    case BuiltinType::SveBFloat16x3:
-    case BuiltinType::SveBFloat16x4: {
-      ASTContext::BuiltinVectorTypeInfo Info =
-          Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
-      return llvm::ScalableVectorType::get(ConvertType(Info.ElementType),
-                                           Info.EC.getKnownMinValue() *
-                                               Info.NumVectors);
-    }
+#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId)                    \
+  case BuiltinType::Id:
+#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId)                 \
+  case BuiltinType::Id:
+#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId)
+#include "clang/Basic/AArch64SVEACLETypes.def"
+      {
+        ASTContext::BuiltinVectorTypeInfo Info =
+            Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
+        return llvm::ScalableVectorType::get(ConvertType(Info.ElementType),
+                                             Info.EC.getKnownMinValue() *
+                                                 Info.NumVectors);
+      }
     case BuiltinType::SveCount:
       return llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
 #define PPC_VECTOR_TYPE(Name, Id, Size) \
diff --git clang/lib/CodeGen/CoverageMappingGen.cpp clang/lib/CodeGen/CoverageMappingGen.cpp
index 67a9caf8b4ec..07015834bc84 100644
--- clang/lib/CodeGen/CoverageMappingGen.cpp
+++ clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -2595,12 +2595,7 @@ void CoverageMappingModuleGen::emit() {
 }
 
 unsigned CoverageMappingModuleGen::getFileID(FileEntryRef File) {
-  auto It = FileEntries.find(File);
-  if (It != FileEntries.end())
-    return It->second;
-  unsigned FileID = FileEntries.size() + 1;
-  FileEntries.insert(std::make_pair(File, FileID));
-  return FileID;
+  return FileEntries.try_emplace(File, FileEntries.size() + 1).first->second;
 }
 
 void CoverageMappingGen::emitCounterMapping(const Decl *D,
diff --git clang/lib/CodeGen/ItaniumCXXABI.cpp clang/lib/CodeGen/ItaniumCXXABI.cpp
index 0cde8a192eda..dcc35d568983 100644
--- clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -315,10 +315,11 @@ public:
                                      Address This, llvm::Type *Ty,
                                      SourceLocation Loc) override;
 
-  llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
-                                         const CXXDestructorDecl *Dtor,
-                                         CXXDtorType DtorType, Address This,
-                                         DeleteOrMemberCallExpr E) override;
+  llvm::Value *
+  EmitVirtualDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *Dtor,
+                            CXXDtorType DtorType, Address This,
+                            DeleteOrMemberCallExpr E,
+                            llvm::CallBase **CallOrInvoke) override;
 
   void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
 
@@ -442,7 +443,10 @@ public:
          continue;
 
        const CXXMethodDecl *Method = VtableComponent.getFunctionDecl();
-       if (!Method->getCanonicalDecl()->isInlined())
+       const FunctionDecl *FD = Method->getDefinition();
+       const bool IsInlined =
+           Method->getCanonicalDecl()->isInlined() || (FD && FD->isInlined());
+       if (!IsInlined)
          continue;
 
        StringRef Name = CGM.getMangledName(VtableComponent.getGlobalDecl());
@@ -1396,7 +1400,8 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
   // FIXME: Provide a source location here even though there's no
   // CXXMemberCallExpr for dtor call.
   CXXDtorType DtorType = UseGlobalDelete ? Dtor_Complete : Dtor_Deleting;
-  EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, DE);
+  EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, DE,
+                            /*CallOrInvoke=*/nullptr);
 
   if (UseGlobalDelete)
     CGF.PopCleanupBlock();
@@ -2233,7 +2238,7 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
 
 llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
     CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType,
-    Address This, DeleteOrMemberCallExpr E) {
+    Address This, DeleteOrMemberCallExpr E, llvm::CallBase **CallOrInvoke) {
   auto *CE = E.dyn_cast<const CXXMemberCallExpr *>();
   auto *D = E.dyn_cast<const CXXDeleteExpr *>();
   assert((CE != nullptr) ^ (D != nullptr));
@@ -2254,7 +2259,7 @@ llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
   }
 
   CGF.EmitCXXDestructorCall(GD, Callee, This.emitRawPointer(CGF), ThisTy,
-                            nullptr, QualType(), nullptr);
+                            nullptr, QualType(), nullptr, CallOrInvoke);
   return nullptr;
 }
 
@@ -2279,8 +2284,18 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTableAsBaseClass(
   if (CGM.getCodeGenOpts().ForceEmitVTables)
     return true;
 
-  // If we don't have any not emitted inline virtual function then we are safe
-  // to emit an available_externally copy of vtable.
+  // A speculative vtable can only be generated if all virtual inline functions
+  // defined by this class are emitted. The vtable in the final program contains
+  // for each virtual inline function not used in the current TU a function that
+  // is equivalent to the unused function. The function in the actual vtable
+  // does not have to be declared under the same symbol (e.g., a virtual
+  // destructor that can be substituted with its base class's destructor). Since
+  // inline functions are emitted lazily and this emissions does not account for
+  // speculative emission of a vtable, we might generate a speculative vtable
+  // with references to inline functions that are not emitted under that name.
+  // This can lead to problems when devirtualizing a call to such a function,
+  // that result in linking errors. Hence, if there are any unused virtual
+  // inline function, we cannot emit the speculative vtable.
   // FIXME we can still emit a copy of the vtable if we
   // can emit definition of the inline functions.
   if (hasAnyUnusedVirtualInlineFunction(RD))
diff --git clang/lib/CodeGen/MicrosoftCXXABI.cpp clang/lib/CodeGen/MicrosoftCXXABI.cpp
index 76d0191a7e63..79dcdc04b099 100644
--- clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -334,10 +334,11 @@ public:
                                      Address This, llvm::Type *Ty,
                                      SourceLocation Loc) override;
 
-  llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
-                                         const CXXDestructorDecl *Dtor,
-                                         CXXDtorType DtorType, Address This,
-                                         DeleteOrMemberCallExpr E) override;
+  llvm::Value *
+  EmitVirtualDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *Dtor,
+                            CXXDtorType DtorType, Address This,
+                            DeleteOrMemberCallExpr E,
+                            llvm::CallBase **CallOrInvoke) override;
 
   void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF, GlobalDecl GD,
                                         CallArgList &CallArgs) override {
@@ -901,7 +902,8 @@ void MicrosoftCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
   // CXXMemberCallExpr for dtor call.
   bool UseGlobalDelete = DE->isGlobalDelete();
   CXXDtorType DtorType = UseGlobalDelete ? Dtor_Complete : Dtor_Deleting;
-  llvm::Value *MDThis = EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, DE);
+  llvm::Value *MDThis = EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, DE,
+                                                  /*CallOrInvoke=*/nullptr);
   if (UseGlobalDelete)
     CGF.EmitDeleteCall(DE->getOperatorDelete(), MDThis, ElementType);
 }
@@ -1685,7 +1687,7 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
   CGF.EmitCXXDestructorCall(GD, Callee, CGF.getAsNaturalPointerTo(This, ThisTy),
                             ThisTy,
                             /*ImplicitParam=*/Implicit,
-                            /*ImplicitParamTy=*/QualType(), nullptr);
+                            /*ImplicitParamTy=*/QualType(), /*E=*/nullptr);
   if (BaseDtorEndBB) {
     // Complete object handler should continue to be the remaining
     CGF.Builder.CreateBr(BaseDtorEndBB);
@@ -2001,7 +2003,7 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
 
 llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
     CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType,
-    Address This, DeleteOrMemberCallExpr E) {
+    Address This, DeleteOrMemberCallExpr E, llvm::CallBase **CallOrInvoke) {
   auto *CE = E.dyn_cast<const CXXMemberCallExpr *>();
   auto *D = E.dyn_cast<const CXXDeleteExpr *>();
   assert((CE != nullptr) ^ (D != nullptr));
@@ -2031,7 +2033,7 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
   This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
   RValue RV =
       CGF.EmitCXXDestructorCall(GD, Callee, This.emitRawPointer(CGF), ThisTy,
-                                ImplicitParam, Context.IntTy, CE);
+                                ImplicitParam, Context.IntTy, CE, CallOrInvoke);
   return RV.getScalarVal();
 }
 
diff --git clang/lib/CodeGen/TargetInfo.h clang/lib/CodeGen/TargetInfo.h
index 0244ca006d49..3e503538b2b1 100644
--- clang/lib/CodeGen/TargetInfo.h
+++ clang/lib/CodeGen/TargetInfo.h
@@ -555,6 +555,9 @@ createTCETargetCodeGenInfo(CodeGenModule &CGM);
 std::unique_ptr<TargetCodeGenInfo>
 createVETargetCodeGenInfo(CodeGenModule &CGM);
 
+std::unique_ptr<TargetCodeGenInfo>
+createDirectXTargetCodeGenInfo(CodeGenModule &CGM);
+
 enum class WebAssemblyABIKind {
   MVP = 0,
   ExperimentalMV = 1,
diff --git clang/lib/CodeGen/Targets/AArch64.cpp clang/lib/CodeGen/Targets/AArch64.cpp
index 97381f673c28..ec617eec6719 100644
--- clang/lib/CodeGen/Targets/AArch64.cpp
+++ clang/lib/CodeGen/Targets/AArch64.cpp
@@ -304,7 +304,7 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
         return getNaturalAlignIndirect(Ty, false);
 
     return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
-                ? ABIArgInfo::getExtend(Ty)
+                ? ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty))
                 : ABIArgInfo::getDirect());
   }
 
@@ -500,7 +500,7 @@ bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize,
 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   // For the soft-float ABI variant, no types are considered to be homogeneous
   // aggregates.
-  if (Kind == AArch64ABIKind::AAPCSSoft)
+  if (isSoftFloat())
     return false;
 
   // Homogeneous aggregates for AAPCS64 must have base types of a floating
@@ -555,8 +555,8 @@ RValue AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
     BaseTy = ArrTy->getElementType();
     NumRegs = ArrTy->getNumElements();
   }
-  bool IsFPR = Kind != AArch64ABIKind::AAPCSSoft &&
-               (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
+  bool IsFPR =
+      !isSoftFloat() && (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
 
   // The AArch64 va_list type and handling is specified in the Procedure Call
   // Standard, section B.4:
diff --git clang/lib/CodeGen/Targets/ARM.cpp clang/lib/CodeGen/Targets/ARM.cpp
index d032b88d7683..f7d7471d386b 100644
--- clang/lib/CodeGen/Targets/ARM.cpp
+++ clang/lib/CodeGen/Targets/ARM.cpp
@@ -354,8 +354,9 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
       if (EIT->getNumBits() > 64)
         return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
 
-    return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
-                                              : ABIArgInfo::getDirect());
+    return (isPromotableIntegerTypeForABI(Ty)
+                ? ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty))
+                : ABIArgInfo::getDirect());
   }
 
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
diff --git clang/lib/CodeGen/Targets/DirectX.cpp clang/lib/CodeGen/Targets/DirectX.cpp
new file mode 100644
index 000000000000..13da2c630629
--- /dev/null
+++ clang/lib/CodeGen/Targets/DirectX.cpp
@@ -0,0 +1,52 @@
+//===- DirectX.cpp---------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ABIInfoImpl.h"
+#include "TargetInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+
+using namespace clang;
+using namespace clang::CodeGen;
+
+//===----------------------------------------------------------------------===//
+// Target codegen info implementation for DirectX.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class DirectXTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  DirectXTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
+      : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
+
+  llvm::Type *getHLSLType(CodeGenModule &CGM, const Type *T) const override;
+};
+
+llvm::Type *DirectXTargetCodeGenInfo::getHLSLType(CodeGenModule &CGM,
+                                                  const Type *Ty) const {
+  auto *BuiltinTy = dyn_cast<BuiltinType>(Ty);
+  if (!BuiltinTy || BuiltinTy->getKind() != BuiltinType::HLSLResource)
+    return nullptr;
+
+  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+  // FIXME: translate __hlsl_resource_t to target("dx.TypedBuffer", <4 x float>,
+  // 1, 0, 0) only for now (RWBuffer<float4>); more work us needed to determine
+  // the target ext type and its parameters based on the handle type
+  // attributes (not yet implemented)
+  llvm::FixedVectorType *ElemType =
+      llvm::FixedVectorType::get(llvm::Type::getFloatTy(Ctx), 4);
+  unsigned Flags[] = {/*IsWriteable*/ 1, /*IsROV*/ 0, /*IsSigned*/ 0};
+  return llvm::TargetExtType::get(Ctx, "dx.TypedBuffer", {ElemType}, Flags);
+}
+
+} // namespace
+
+std::unique_ptr<TargetCodeGenInfo>
+CodeGen::createDirectXTargetCodeGenInfo(CodeGenModule &CGM) {
+  return std::make_unique<DirectXTargetCodeGenInfo>(CGM.getTypes());
+}
diff --git clang/lib/CodeGen/Targets/RISCV.cpp clang/lib/CodeGen/Targets/RISCV.cpp
index 826a1ec2c9d3..57b09f1a3d76 100644
--- clang/lib/CodeGen/Targets/RISCV.cpp
+++ clang/lib/CodeGen/Targets/RISCV.cpp
@@ -51,7 +51,7 @@ public:
   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
                    AggValueSlot Slot) const override;
 
-  ABIArgInfo extendType(QualType Ty) const;
+  ABIArgInfo extendType(QualType Ty, llvm::Type *CoerceTy = nullptr) const;
 
   bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
                                 CharUnits &Field1Off, llvm::Type *&Field2Ty,
@@ -439,12 +439,12 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
 
     // All integral types are promoted to XLen width
     if (Size < XLen && Ty->isIntegralOrEnumerationType()) {
-      return extendType(Ty);
+      return extendType(Ty, CGT.ConvertType(Ty));
     }
 
     if (const auto *EIT = Ty->getAs<BitIntType>()) {
       if (EIT->getNumBits() < XLen)
-        return extendType(Ty);
+        return extendType(Ty, CGT.ConvertType(Ty));
       if (EIT->getNumBits() > 128 ||
           (!getContext().getTargetInfo().hasInt128Type() &&
            EIT->getNumBits() > 64))
@@ -526,12 +526,12 @@ RValue RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                           /*AllowHigherAlign=*/true, Slot);
 }
 
-ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const {
+ABIArgInfo RISCVABIInfo::extendType(QualType Ty, llvm::Type *CoerceTy) const {
   int TySize = getContext().getTypeSize(Ty);
   // RV64 ABI requires unsigned 32 bit integers to be sign extended.
   if (XLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
-    return ABIArgInfo::getSignExtend(Ty);
-  return ABIArgInfo::getExtend(Ty);
+    return ABIArgInfo::getSignExtend(Ty, CoerceTy);
+  return ABIArgInfo::getExtend(Ty, CoerceTy);
 }
 
 namespace {
diff --git clang/lib/CodeGen/Targets/X86.cpp clang/lib/CodeGen/Targets/X86.cpp
index f71872e77fe8..7e051e475f9d 100644
--- clang/lib/CodeGen/Targets/X86.cpp
+++ clang/lib/CodeGen/Targets/X86.cpp
@@ -881,8 +881,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State,
 
   if (isPromotableIntegerTypeForABI(Ty)) {
     if (InReg)
-      return ABIArgInfo::getExtendInReg(Ty);
-    return ABIArgInfo::getExtend(Ty);
+      return ABIArgInfo::getExtendInReg(Ty, CGT.ConvertType(Ty));
+    return ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty));
   }
 
   if (const auto *EIT = Ty->getAs<BitIntType>()) {
@@ -2756,7 +2756,7 @@ X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs,
 
       if (Ty->isIntegralOrEnumerationType() &&
           isPromotableIntegerTypeForABI(Ty))
-        return ABIArgInfo::getExtend(Ty);
+        return ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty));
     }
 
     break;
diff --git clang/lib/Driver/Driver.cpp clang/lib/Driver/Driver.cpp
index 5b3783e20eab..efe398dd531d 100644
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -3026,12 +3026,6 @@ class OffloadingActionBuilder final {
         // Set the flag to true, so that the builder acts on the current input.
         IsActive = true;
 
-        if (CompileHostOnly)
-          return ABRT_Success;
-
-        // Replicate inputs for each GPU architecture.
-        auto Ty = IA->getType() == types::TY_HIP ? types::TY_HIP_DEVICE
-                                                 : types::TY_CUDA_DEVICE;
         std::string CUID = FixedCUID.str();
         if (CUID.empty()) {
           if (UseCUID == CUID_Random)
@@ -3055,6 +3049,12 @@ class OffloadingActionBuilder final {
         }
         IA->setId(CUID);
 
+        if (CompileHostOnly)
+          return ABRT_Success;
+
+        // Replicate inputs for each GPU architecture.
+        auto Ty = IA->getType() == types::TY_HIP ? types::TY_HIP_DEVICE
+                                                 : types::TY_CUDA_DEVICE;
         for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
           CudaDeviceActions.push_back(
               C.MakeAction<InputAction>(IA->getInputArg(), Ty, IA->getId()));
diff --git clang/lib/Driver/SanitizerArgs.cpp clang/lib/Driver/SanitizerArgs.cpp
index 18bb35a56316..89f1215afd0c 100644
--- clang/lib/Driver/SanitizerArgs.cpp
+++ clang/lib/Driver/SanitizerArgs.cpp
@@ -929,10 +929,16 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
         DiagnoseErrors);
   }
 
-  SharedRuntime =
-      Args.hasFlag(options::OPT_shared_libsan, options::OPT_static_libsan,
-                   TC.getTriple().isAndroid() || TC.getTriple().isOSFuchsia() ||
-                       TC.getTriple().isOSDarwin());
+  SharedRuntime = Args.hasFlag(
+      options::OPT_shared_libsan, options::OPT_static_libsan,
+      TC.getTriple().isAndroid() || TC.getTriple().isOSFuchsia() ||
+          TC.getTriple().isOSDarwin() || TC.getTriple().isOSWindows());
+  if (!SharedRuntime && TC.getTriple().isOSWindows()) {
+    Arg *A =
+        Args.getLastArg(options::OPT_shared_libsan, options::OPT_static_libsan);
+    D.Diag(clang::diag::err_drv_unsupported_opt_for_target)
+        << A->getSpelling() << TC.getTriple().str();
+  }
 
   ImplicitCfiRuntime = TC.getTriple().isAndroid();
 
diff --git clang/lib/Driver/ToolChain.cpp clang/lib/Driver/ToolChain.cpp
index 64f23d43e87e..16f9b629fc53 100644
--- clang/lib/Driver/ToolChain.cpp
+++ clang/lib/Driver/ToolChain.cpp
@@ -126,7 +126,7 @@ ToolChain::executeToolChainProgram(StringRef Executable) const {
                                      "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected "
                                      "an integer, got '" +
                                          *Str + "'");
-    SecondsToWait = std::min(SecondsToWait, 0); // infinite
+    SecondsToWait = std::max(SecondsToWait, 0); // infinite
   }
   if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, SecondsToWait,
                                 /*MemoryLimit=*/0, &ErrorMessage))
diff --git clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/AMDGPU.cpp
index a788aba57546..74f70573c5fe 100644
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -1054,3 +1054,39 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
       DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
       FastRelaxedMath, CorrectSqrt, ABIVer, isOpenMP);
 }
+
+bool AMDGPUToolChain::shouldSkipSanitizeOption(
+    const ToolChain &TC, const llvm::opt::ArgList &DriverArgs,
+    StringRef TargetID, const llvm::opt::Arg *A) const {
+  // For actions without targetID, do nothing.
+  if (TargetID.empty())
+    return false;
+  Option O = A->getOption();
+  if (!O.matches(options::OPT_fsanitize_EQ))
+    return false;
+
+  if (!DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
+                          options::OPT_fno_gpu_sanitize, true))
+    return true;
+
+  auto &Diags = TC.getDriver().getDiags();
+
+  // For simplicity, we only allow -fsanitize=address
+  SanitizerMask K = parseSanitizerValue(A->getValue(), /*AllowGroups=*/false);
+  if (K != SanitizerKind::Address)
+    return true;
+
+  llvm::StringMap<bool> FeatureMap;
+  auto OptionalGpuArch = parseTargetID(TC.getTriple(), TargetID, &FeatureMap);
+
+  assert(OptionalGpuArch && "Invalid Target ID");
+  (void)OptionalGpuArch;
+  auto Loc = FeatureMap.find("xnack");
+  if (Loc == FeatureMap.end() || !Loc->second) {
+    Diags.Report(
+        clang::diag::warn_drv_unsupported_option_for_offload_arch_req_feature)
+        << A->getAsString(DriverArgs) << TargetID << "xnack+";
+    return true;
+  }
+  return false;
+}
\ No newline at end of file
diff --git clang/lib/Driver/ToolChains/AMDGPU.h clang/lib/Driver/ToolChains/AMDGPU.h
index 7e70dae8ce15..a9b4552a1f91 100644
--- clang/lib/Driver/ToolChains/AMDGPU.h
+++ clang/lib/Driver/ToolChains/AMDGPU.h
@@ -97,6 +97,12 @@ public:
   /// Needed for translating LTO options.
   const char *getDefaultLinker() const override { return "ld.lld"; }
 
+  /// Should skip sanitize options.
+  bool shouldSkipSanitizeOption(const ToolChain &TC,
+                                const llvm::opt::ArgList &DriverArgs,
+                                StringRef TargetID,
+                                const llvm::opt::Arg *A) const;
+
   /// Uses amdgpu-arch tool to get arch of the system GPU. Will return error
   /// if unable to find one.
   virtual Expected<SmallVector<std::string>>
diff --git clang/lib/Driver/ToolChains/Arch/SystemZ.cpp clang/lib/Driver/ToolChains/Arch/SystemZ.cpp
index 2213f431eb81..574092c3e215 100644
--- clang/lib/Driver/ToolChains/Arch/SystemZ.cpp
+++ clang/lib/Driver/ToolChains/Arch/SystemZ.cpp
@@ -34,7 +34,8 @@ systemz::FloatABI systemz::getSystemZFloatABI(const Driver &D,
   return ABI;
 }
 
-std::string systemz::getSystemZTargetCPU(const ArgList &Args) {
+std::string systemz::getSystemZTargetCPU(const ArgList &Args,
+                                         const llvm::Triple &T) {
   if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ)) {
     llvm::StringRef CPUName = A->getValue();
 
@@ -48,6 +49,8 @@ std::string systemz::getSystemZTargetCPU(const ArgList &Args) {
 
     return std::string(CPUName);
   }
+  if (T.isOSzOS())
+    return "zEC12";
   return CLANG_SYSTEMZ_DEFAULT_ARCH;
 }
 
diff --git clang/lib/Driver/ToolChains/Arch/SystemZ.h clang/lib/Driver/ToolChains/Arch/SystemZ.h
index 1e42b68a8f3c..f2d30d24ba63 100644
--- clang/lib/Driver/ToolChains/Arch/SystemZ.h
+++ clang/lib/Driver/ToolChains/Arch/SystemZ.h
@@ -27,7 +27,8 @@ enum class FloatABI {
 
 FloatABI getSystemZFloatABI(const Driver &D, const llvm::opt::ArgList &Args);
 
-std::string getSystemZTargetCPU(const llvm::opt::ArgList &Args);
+std::string getSystemZTargetCPU(const llvm::opt::ArgList &Args,
+                                const llvm::Triple &T);
 
 void getSystemZTargetFeatures(const Driver &D, const llvm::opt::ArgList &Args,
                               std::vector<llvm::StringRef> &Features);
diff --git clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/Clang.cpp
index baac1215215b..3fe4ce5d893b 100644
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4853,7 +4853,8 @@ renderDebugOptions(const ToolChain &TC, const Driver &D, const llvm::Triple &T,
 
   // This controls whether or not we perform JustMyCode instrumentation.
   if (Args.hasFlag(options::OPT_fjmc, options::OPT_fno_jmc, false)) {
-    if (TC.getTriple().isOSBinFormatELF() || D.IsCLMode()) {
+    if (TC.getTriple().isOSBinFormatELF() ||
+        TC.getTriple().isWindowsMSVCEnvironment()) {
       if (DebugInfoKind >= llvm::codegenoptions::DebugInfoConstructor)
         CmdArgs.push_back("-fjmc");
       else if (D.IsCLMode())
@@ -6785,8 +6786,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("--offload-new-driver");
   }
 
-  SanitizeArgs.addArgs(TC, Args, CmdArgs, InputType);
-
   const XRayArgs &XRay = TC.getXRayArgs();
   XRay.addArgs(TC, Args, CmdArgs, InputType);
 
@@ -7676,6 +7675,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
+  // This needs to run after -Xclang argument forwarding to pick up the target
+  // features enabled through -Xclang -target-feature flags.
+  SanitizeArgs.addArgs(TC, Args, CmdArgs, InputType);
+
   // With -save-temps, we want to save the unoptimized bitcode output from the
   // CompileJobAction, use -disable-llvm-passes to get pristine IR generated
   // by the frontend.
diff --git clang/lib/Driver/ToolChains/CommonArgs.cpp clang/lib/Driver/ToolChains/CommonArgs.cpp
index 0601016c3b14..f58b816a9709 100644
--- clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -674,7 +674,7 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args,
     return getLanaiTargetCPU(Args);
 
   case llvm::Triple::systemz:
-    return systemz::getSystemZTargetCPU(Args);
+    return systemz::getSystemZTargetCPU(Args, T);
 
   case llvm::Triple::r600:
   case llvm::Triple::amdgcn:
@@ -2902,11 +2902,16 @@ void tools::addMCModel(const Driver &D, const llvm::opt::ArgList &Args,
     } else if (Triple.isPPC64() || Triple.isOSAIX()) {
       Ok = CM == "small" || CM == "medium" || CM == "large";
     } else if (Triple.isRISCV()) {
+      // Large code model is disallowed to be used with PIC code model.
+      if (CM == "large" && RelocationModel != llvm::Reloc::Static)
+        D.Diag(diag::err_drv_argument_not_allowed_with)
+            << A->getAsString(Args) << "-fpic";
       if (CM == "medlow")
         CM = "small";
       else if (CM == "medany")
         CM = "medium";
-      Ok = CM == "small" || CM == "medium";
+      Ok = CM == "small" || CM == "medium" ||
+           (CM == "large" && Triple.isRISCV64());
     } else if (Triple.getArch() == llvm::Triple::x86_64) {
       Ok = llvm::is_contained({"small", "kernel", "medium", "large", "tiny"},
                               CM);
diff --git clang/lib/Driver/ToolChains/Cuda.cpp clang/lib/Driver/ToolChains/Cuda.cpp
index 3f9885b196ec..ef44ffa5594d 100644
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -17,6 +17,7 @@
 #include "clang/Driver/InputInfo.h"
 #include "clang/Driver/Options.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/llvm-config.h" // for LLVM_HOST_TRIPLE
 #include "llvm/Option/ArgList.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FormatAdapters.h"
diff --git clang/lib/Driver/ToolChains/Gnu.cpp clang/lib/Driver/ToolChains/Gnu.cpp
index fe9f13d693d4..608fdf2dc7bb 100644
--- clang/lib/Driver/ToolChains/Gnu.cpp
+++ clang/lib/Driver/ToolChains/Gnu.cpp
@@ -966,7 +966,8 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
   case llvm::Triple::systemz: {
     // Always pass an -march option, since our default of z10 is later
     // than the GNU assembler's default.
-    std::string CPUName = systemz::getSystemZTargetCPU(Args);
+    std::string CPUName =
+        systemz::getSystemZTargetCPU(Args, getToolChain().getTriple());
     CmdArgs.push_back(Args.MakeArgString("-march=" + CPUName));
     break;
   }
diff --git clang/lib/Driver/ToolChains/HIPAMD.cpp clang/lib/Driver/ToolChains/HIPAMD.cpp
index cbb8fab69a31..bae05cc0bb73 100644
--- clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -36,43 +36,6 @@ using namespace llvm::opt;
 #define NULL_FILE "/dev/null"
 #endif
 
-static bool shouldSkipSanitizeOption(const ToolChain &TC,
-                                     const llvm::opt::ArgList &DriverArgs,
-                                     StringRef TargetID,
-                                     const llvm::opt::Arg *A) {
-  // For actions without targetID, do nothing.
-  if (TargetID.empty())
-    return false;
-  Option O = A->getOption();
-  if (!O.matches(options::OPT_fsanitize_EQ))
-    return false;
-
-  if (!DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
-                          options::OPT_fno_gpu_sanitize, true))
-    return true;
-
-  auto &Diags = TC.getDriver().getDiags();
-
-  // For simplicity, we only allow -fsanitize=address
-  SanitizerMask K = parseSanitizerValue(A->getValue(), /*AllowGroups=*/false);
-  if (K != SanitizerKind::Address)
-    return true;
-
-  llvm::StringMap<bool> FeatureMap;
-  auto OptionalGpuArch = parseTargetID(TC.getTriple(), TargetID, &FeatureMap);
-
-  assert(OptionalGpuArch && "Invalid Target ID");
-  (void)OptionalGpuArch;
-  auto Loc = FeatureMap.find("xnack");
-  if (Loc == FeatureMap.end() || !Loc->second) {
-    Diags.Report(
-        clang::diag::warn_drv_unsupported_option_for_offload_arch_req_feature)
-        << A->getAsString(DriverArgs) << TargetID << "xnack+";
-    return true;
-  }
-  return false;
-}
-
 void AMDGCN::Linker::constructLlvmLinkCommand(Compilation &C,
                                          const JobAction &JA,
                                          const InputInfoList &Inputs,
diff --git clang/lib/Driver/ToolChains/MSVC.cpp clang/lib/Driver/ToolChains/MSVC.cpp
index ca266e3e1d1d..7eb38098bed2 100644
--- clang/lib/Driver/ToolChains/MSVC.cpp
+++ clang/lib/Driver/ToolChains/MSVC.cpp
@@ -201,10 +201,10 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (TC.getSanitizerArgs(Args).needsAsanRt()) {
     CmdArgs.push_back(Args.MakeArgString("-debug"));
     CmdArgs.push_back(Args.MakeArgString("-incremental:no"));
-    if (TC.getSanitizerArgs(Args).needsSharedRt() ||
-        Args.hasArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd)) {
-      for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"})
-        CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
+    CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dynamic"));
+    auto defines = Args.getAllArgValues(options::OPT_D);
+    if (Args.hasArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd) ||
+        find(begin(defines), end(defines), "_DLL") != end(defines)) {
       // Make sure the dynamic runtime thunk is not optimized out at link time
       // to ensure proper SEH handling.
       CmdArgs.push_back(Args.MakeArgString(
@@ -213,19 +213,15 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
               : "-include:__asan_seh_interceptor"));
       // Make sure the linker consider all object files from the dynamic runtime
       // thunk.
-      CmdArgs.push_back(Args.MakeArgString(std::string("-wholearchive:") +
+      CmdArgs.push_back(Args.MakeArgString(
+          std::string("-wholearchive:") +
           TC.getCompilerRT(Args, "asan_dynamic_runtime_thunk")));
-    } else if (DLL) {
-      CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk"));
     } else {
-      for (const auto &Lib : {"asan", "asan_cxx"}) {
-        CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
-        // Make sure the linker consider all object files from the static lib.
-        // This is necessary because instrumented dlls need access to all the
-        // interface exported by the static lib in the main executable.
-        CmdArgs.push_back(Args.MakeArgString(std::string("-wholearchive:") +
-            TC.getCompilerRT(Args, Lib)));
-      }
+      // Make sure the linker consider all object files from the static runtime
+      // thunk.
+      CmdArgs.push_back(Args.MakeArgString(
+          std::string("-wholearchive:") +
+          TC.getCompilerRT(Args, "asan_static_runtime_thunk")));
     }
   }
 
diff --git clang/lib/Driver/ToolChains/MinGW.cpp clang/lib/Driver/ToolChains/MinGW.cpp
index c81a7ed17029..85f40893e542 100644
--- clang/lib/Driver/ToolChains/MinGW.cpp
+++ clang/lib/Driver/ToolChains/MinGW.cpp
@@ -15,6 +15,7 @@
 #include "clang/Driver/InputInfo.h"
 #include "clang/Driver/Options.h"
 #include "clang/Driver/SanitizerArgs.h"
+#include "llvm/Config/llvm-config.h" // for LLVM_HOST_TRIPLE
 #include "llvm/Option/ArgList.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
diff --git clang/lib/Driver/ToolChains/PS4CPU.cpp clang/lib/Driver/ToolChains/PS4CPU.cpp
index 22103eb50803..48d824171303 100644
--- clang/lib/Driver/ToolChains/PS4CPU.cpp
+++ clang/lib/Driver/ToolChains/PS4CPU.cpp
@@ -272,6 +272,8 @@ void tools::PS5cpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(D.getLTOMode() == LTOK_Thin ? "--lto=thin"
                                                   : "--lto=full");
 
+  AddLTOFlag("-emit-jump-table-sizes-section");
+
   if (UseJMC)
     AddLTOFlag("-enable-jmc-instrument");
 
@@ -354,9 +356,7 @@ toolchains::PS4PS5Base::PS4PS5Base(const Driver &D, const llvm::Triple &Triple,
 
   SmallString<512> SDKLibDir(SDKRootDir);
   llvm::sys::path::append(SDKLibDir, "target/lib");
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs) &&
-      !Args.hasArg(options::OPT__sysroot_EQ) && !Args.hasArg(options::OPT_E) &&
+  if (!Args.hasArg(options::OPT__sysroot_EQ) && !Args.hasArg(options::OPT_E) &&
       !Args.hasArg(options::OPT_c) && !Args.hasArg(options::OPT_S) &&
       !Args.hasArg(options::OPT_emit_ast) &&
       !llvm::sys::fs::exists(SDKLibDir)) {
@@ -486,6 +486,12 @@ void toolchains::PS4PS5Base::addClangTargetOptions(
     else
       CC1Args.push_back("-fvisibility-externs-nodllstorageclass=keep");
   }
+
+  // Enable jump table sizes section for PS5.
+  if (getTriple().isPS5()) {
+    CC1Args.push_back("-mllvm");
+    CC1Args.push_back("-emit-jump-table-sizes-section");
+  }
 }
 
 // PS4 toolchain.
diff --git clang/lib/Driver/ToolChains/WebAssembly.cpp clang/lib/Driver/ToolChains/WebAssembly.cpp
index 9aacda5fd570..9aec11e69fde 100644
--- clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -15,6 +15,7 @@
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Options.h"
+#include "llvm/Config/llvm-config.h" // for LLVM_VERSION_STRING
 #include "llvm/Option/ArgList.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
diff --git clang/lib/ExtractAPI/DeclarationFragments.cpp clang/lib/ExtractAPI/DeclarationFragments.cpp
index d77bb1d424f7..06ce5ed6a647 100644
--- clang/lib/ExtractAPI/DeclarationFragments.cpp
+++ clang/lib/ExtractAPI/DeclarationFragments.cpp
@@ -276,6 +276,19 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForType(
 
   DeclarationFragments Fragments;
 
+  if (const MacroQualifiedType *MQT = dyn_cast<MacroQualifiedType>(T)) {
+    Fragments.append(
+        getFragmentsForType(MQT->getUnderlyingType(), Context, After));
+    return Fragments;
+  }
+
+  if (const AttributedType *AT = dyn_cast<AttributedType>(T)) {
+    // FIXME: Serialize Attributes correctly
+    Fragments.append(
+        getFragmentsForType(AT->getModifiedType(), Context, After));
+    return Fragments;
+  }
+
   // An ElaboratedType is a sugar for types that are referred to using an
   // elaborated keyword, e.g., `struct S`, `enum E`, or (in C++) via a
   // qualified name, e.g., `N::M::type`, or both.
diff --git clang/lib/Format/ContinuationIndenter.cpp clang/lib/Format/ContinuationIndenter.cpp
index 5843571718b3..f29f8796ea92 100644
--- clang/lib/Format/ContinuationIndenter.cpp
+++ clang/lib/Format/ContinuationIndenter.cpp
@@ -815,7 +815,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
     return Tok.is(tok::l_paren) && Tok.ParameterCount > 0 && Tok.Previous &&
            Tok.Previous->is(tok::identifier);
   };
-  const auto IsInTemplateString = [this](const FormatToken &Tok) {
+  auto IsInTemplateString = [this](const FormatToken &Tok) {
     if (!Style.isJavaScript())
       return false;
     for (const auto *Prev = &Tok; Prev; Prev = Prev->Previous) {
@@ -827,7 +827,10 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
     return false;
   };
   // Identifies simple (no expression) one-argument function calls.
-  const auto IsSimpleFunction = [&](const FormatToken &Tok) {
+  auto StartsSimpleOneArgList = [&](const FormatToken &TokAfterLParen) {
+    assert(TokAfterLParen.isNot(tok::comment) || TokAfterLParen.Next);
+    const auto &Tok =
+        TokAfterLParen.is(tok::comment) ? *TokAfterLParen.Next : TokAfterLParen;
     if (!Tok.FakeLParens.empty() && Tok.FakeLParens.back() > prec::Unknown)
       return false;
     // Nested calls that involve `new` expressions also look like simple
@@ -836,6 +839,11 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
     // - foo(::new Bar())
     if (Tok.is(tok::kw_new) || Tok.startsSequence(tok::coloncolon, tok::kw_new))
       return true;
+    if (Tok.is(TT_UnaryOperator) ||
+        (Style.isJavaScript() &&
+         Tok.isOneOf(tok::ellipsis, Keywords.kw_await))) {
+      return true;
+    }
     const auto *Previous = Tok.Previous;
     if (!Previous || (!Previous->isOneOf(TT_FunctionDeclarationLParen,
                                          TT_LambdaDefinitionLParen) &&
@@ -861,7 +869,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
       //  or
       //  caaaaaaaaaaaaaaaaaaaaal(
       //       new SomethingElseeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee());
-      !IsSimpleFunction(Current)) {
+      !StartsSimpleOneArgList(Current)) {
     CurrentState.NoLineBreak = true;
   }
 
diff --git clang/lib/Format/ObjCPropertyAttributeOrderFixer.cpp clang/lib/Format/ObjCPropertyAttributeOrderFixer.cpp
index c91d6251425e..3ebac99b17fb 100644
--- clang/lib/Format/ObjCPropertyAttributeOrderFixer.cpp
+++ clang/lib/Format/ObjCPropertyAttributeOrderFixer.cpp
@@ -86,12 +86,9 @@ void ObjCPropertyAttributeOrderFixer::sortPropertyAttributes(
       Value = Tok->TokenText;
     }
 
-    auto It = SortOrderMap.find(Attribute);
-    if (It == SortOrderMap.end())
-      It = SortOrderMap.insert({Attribute, SortOrderMap.size()}).first;
-
     // Sort the indices based on the priority stored in `SortOrderMap`.
-    const auto Ordinal = It->second;
+    const auto Ordinal =
+        SortOrderMap.try_emplace(Attribute, SortOrderMap.size()).first->second;
     if (!Ordinals.insert(Ordinal).second) {
       HasDuplicates = true;
       continue;
diff --git clang/lib/Format/TokenAnnotator.cpp clang/lib/Format/TokenAnnotator.cpp
index 6a1cf61659fd..dfa703aed0d3 100644
--- clang/lib/Format/TokenAnnotator.cpp
+++ clang/lib/Format/TokenAnnotator.cpp
@@ -2874,21 +2874,26 @@ private:
     if (Line.InPPDirective && AfterRParen->is(tok::minus))
       return false;
 
+    const auto *Prev = BeforeRParen;
+
+    // Look for a function pointer type, e.g. `(*)()`.
+    if (Prev->is(tok::r_paren)) {
+      if (Prev->is(TT_CastRParen))
+        return false;
+      Prev = Prev->MatchingParen;
+      if (!Prev)
+        return false;
+      Prev = Prev->Previous;
+      if (!Prev || Prev->isNot(tok::r_paren))
+        return false;
+      Prev = Prev->MatchingParen;
+      return Prev && Prev->is(TT_FunctionTypeLParen);
+    }
+
     // Search for unexpected tokens.
-    for (auto *Prev = BeforeRParen; Prev != LParen; Prev = Prev->Previous) {
-      if (Prev->is(tok::r_paren)) {
-        if (Prev->is(TT_CastRParen))
-          return false;
-        Prev = Prev->MatchingParen;
-        if (!Prev)
-          return false;
-        if (Prev->is(TT_FunctionTypeLParen))
-          break;
-        continue;
-      }
+    for (Prev = BeforeRParen; Prev != LParen; Prev = Prev->Previous)
       if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
         return false;
-    }
 
     return true;
   }
diff --git clang/lib/Format/UnwrappedLineParser.cpp clang/lib/Format/UnwrappedLineParser.cpp
index 246b29d308bf..1727ed93822b 100644
--- clang/lib/Format/UnwrappedLineParser.cpp
+++ clang/lib/Format/UnwrappedLineParser.cpp
@@ -570,7 +570,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
                                                  Keywords.kw_as));
           ProbablyBracedList =
-              ProbablyBracedList || (IsCpp && NextTok->is(tok::l_paren));
+              ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||
+                                               NextTok->is(tok::l_paren)));
 
           // If there is a comma, semicolon or right paren after the closing
           // brace, we assume this is a braced initializer list.
diff --git clang/lib/Frontend/CompilerInstance.cpp clang/lib/Frontend/CompilerInstance.cpp
index 1364641a9b71..5a273474f1d6 100644
--- clang/lib/Frontend/CompilerInstance.cpp
+++ clang/lib/Frontend/CompilerInstance.cpp
@@ -381,6 +381,9 @@ FileManager *CompilerInstance::createFileManager(
                   : createVFSFromCompilerInvocation(getInvocation(),
                                                     getDiagnostics());
   assert(VFS && "FileManager has no VFS?");
+  if (getFrontendOpts().ShowStats)
+    VFS =
+        llvm::makeIntrusiveRefCnt<llvm::vfs::TracingFileSystem>(std::move(VFS));
   FileMgr = new FileManager(getFileSystemOpts(), std::move(VFS));
   return FileMgr.get();
 }
diff --git clang/lib/Frontend/FrontendActions.cpp clang/lib/Frontend/FrontendActions.cpp
index 9f5d09e33ce2..64f90c493c10 100644
--- clang/lib/Frontend/FrontendActions.cpp
+++ clang/lib/Frontend/FrontendActions.cpp
@@ -26,6 +26,7 @@
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/ASTWriter.h"
 #include "clang/Serialization/ModuleFile.h"
+#include "llvm/Config/llvm-config.h" // for LLVM_HOST_TRIPLE
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
diff --git clang/lib/Frontend/InitPreprocessor.cpp clang/lib/Frontend/InitPreprocessor.cpp
index 61260a337982..9a0fdb175ff2 100644
--- clang/lib/Frontend/InitPreprocessor.cpp
+++ clang/lib/Frontend/InitPreprocessor.cpp
@@ -671,10 +671,9 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
                         LangOpts.CPlusPlus23   ? "202211L"
                         : LangOpts.CPlusPlus17 ? "201603L"
                                                : "200907");
-    Builder.defineMacro("__cpp_static_assert", LangOpts.CPlusPlus26 ? "202306L"
-                                               : LangOpts.CPlusPlus17
-                                                   ? "201411L"
-                                                   : "200410");
+    // C++17 / C++26 static_assert supported as an extension in earlier language
+    // modes, so we use the C++26 value.
+    Builder.defineMacro("__cpp_static_assert", "202306L");
     Builder.defineMacro("__cpp_decltype", "200707L");
     Builder.defineMacro("__cpp_attributes", "200809L");
     Builder.defineMacro("__cpp_rvalue_references", "200610L");
diff --git clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
index f618c536b5f3..8cdb463e2c99 100644
--- clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
+++ clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
@@ -138,10 +138,8 @@ namespace {
     SmallVector<DeclRefExpr *, 32> BlockDeclRefs;
 
     // Block related declarations.
-    SmallVector<ValueDecl *, 8> BlockByCopyDecls;
-    llvm::SmallPtrSet<ValueDecl *, 8> BlockByCopyDeclsPtrSet;
-    SmallVector<ValueDecl *, 8> BlockByRefDecls;
-    llvm::SmallPtrSet<ValueDecl *, 8> BlockByRefDeclsPtrSet;
+    llvm::SmallSetVector<ValueDecl *, 8> BlockByCopyDecls;
+    llvm::SmallSetVector<ValueDecl *, 8> BlockByRefDecls;
     llvm::DenseMap<ValueDecl *, unsigned> BlockByRefDeclNo;
     llvm::SmallPtrSet<ValueDecl *, 8> ImportedBlockDecls;
     llvm::SmallPtrSet<VarDecl *, 8> ImportedLocalExternalDecls;
@@ -4082,19 +4080,17 @@ std::string RewriteModernObjC::SynthesizeBlockFunc(BlockExpr *CE, int i,
 
   // Create local declarations to avoid rewriting all closure decl ref exprs.
   // First, emit a declaration for all "by ref" decls.
-  for (SmallVectorImpl<ValueDecl *>::iterator I = BlockByRefDecls.begin(),
-       E = BlockByRefDecls.end(); I != E; ++I) {
+  for (ValueDecl *VD : BlockByRefDecls) {
     S += "  ";
-    std::string Name = (*I)->getNameAsString();
+    std::string Name = VD->getNameAsString();
     std::string TypeString;
-    RewriteByRefString(TypeString, Name, (*I));
+    RewriteByRefString(TypeString, Name, VD);
     TypeString += " *";
     Name = TypeString + Name;
-    S += Name + " = __cself->" + (*I)->getNameAsString() + "; // bound by ref\n";
+    S += Name + " = __cself->" + VD->getNameAsString() + "; // bound by ref\n";
   }
   // Next, emit a declaration for all "by copy" declarations.
-  for (SmallVectorImpl<ValueDecl *>::iterator I = BlockByCopyDecls.begin(),
-       E = BlockByCopyDecls.end(); I != E; ++I) {
+  for (ValueDecl *VD : BlockByCopyDecls) {
     S += "  ";
     // Handle nested closure invocation. For example:
     //
@@ -4106,21 +4102,20 @@ std::string RewriteModernObjC::SynthesizeBlockFunc(BlockExpr *CE, int i,
     //     myImportedClosure(); // import and invoke the closure
     //   };
     //
-    if (isTopLevelBlockPointerType((*I)->getType())) {
-      RewriteBlockPointerTypeVariable(S, (*I));
+    if (isTopLevelBlockPointerType(VD->getType())) {
+      RewriteBlockPointerTypeVariable(S, VD);
       S += " = (";
-      RewriteBlockPointerType(S, (*I)->getType());
+      RewriteBlockPointerType(S, VD->getType());
       S += ")";
-      S += "__cself->" + (*I)->getNameAsString() + "; // bound by copy\n";
-    }
-    else {
-      std::string Name = (*I)->getNameAsString();
-      QualType QT = (*I)->getType();
-      if (HasLocalVariableExternalStorage(*I))
+      S += "__cself->" + VD->getNameAsString() + "; // bound by copy\n";
+    } else {
+      std::string Name = VD->getNameAsString();
+      QualType QT = VD->getType();
+      if (HasLocalVariableExternalStorage(VD))
         QT = Context->getPointerType(QT);
       QT.getAsStringInternal(Name, Context->getPrintingPolicy());
-      S += Name + " = __cself->" +
-                              (*I)->getNameAsString() + "; // bound by copy\n";
+      S += Name + " = __cself->" + VD->getNameAsString() +
+           "; // bound by copy\n";
     }
   }
   std::string RewrittenStr = RewrittenBlockExprs[CE];
@@ -4146,7 +4141,7 @@ std::string RewriteModernObjC::SynthesizeBlockHelperFuncs(
     S += VD->getNameAsString();
     S += ", (void*)src->";
     S += VD->getNameAsString();
-    if (BlockByRefDeclsPtrSet.count(VD))
+    if (BlockByRefDecls.count(VD))
       S += ", " + utostr(BLOCK_FIELD_IS_BYREF) + "/*BLOCK_FIELD_IS_BYREF*/);";
     else if (VD->getType()->isBlockPointerType())
       S += ", " + utostr(BLOCK_FIELD_IS_BLOCK) + "/*BLOCK_FIELD_IS_BLOCK*/);";
@@ -4163,7 +4158,7 @@ std::string RewriteModernObjC::SynthesizeBlockHelperFuncs(
   for (ValueDecl *VD : ImportedBlockDecls) {
     S += "_Block_object_dispose((void*)src->";
     S += VD->getNameAsString();
-    if (BlockByRefDeclsPtrSet.count(VD))
+    if (BlockByRefDecls.count(VD))
       S += ", " + utostr(BLOCK_FIELD_IS_BYREF) + "/*BLOCK_FIELD_IS_BYREF*/);";
     else if (VD->getType()->isBlockPointerType())
       S += ", " + utostr(BLOCK_FIELD_IS_BLOCK) + "/*BLOCK_FIELD_IS_BLOCK*/);";
@@ -4190,10 +4185,9 @@ std::string RewriteModernObjC::SynthesizeBlockImpl(BlockExpr *CE,
 
   if (BlockDeclRefs.size()) {
     // Output all "by copy" declarations.
-    for (SmallVectorImpl<ValueDecl *>::iterator I = BlockByCopyDecls.begin(),
-         E = BlockByCopyDecls.end(); I != E; ++I) {
+    for (ValueDecl *VD : BlockByCopyDecls) {
       S += "  ";
-      std::string FieldName = (*I)->getNameAsString();
+      std::string FieldName = VD->getNameAsString();
       std::string ArgName = "_" + FieldName;
       // Handle nested closure invocation. For example:
       //
@@ -4205,12 +4199,12 @@ std::string RewriteModernObjC::SynthesizeBlockImpl(BlockExpr *CE,
       //     myImportedBlock(); // import and invoke the closure
       //   };
       //
-      if (isTopLevelBlockPointerType((*I)->getType())) {
+      if (isTopLevelBlockPointerType(VD->getType())) {
         S += "struct __block_impl *";
         Constructor += ", void *" + ArgName;
       } else {
-        QualType QT = (*I)->getType();
-        if (HasLocalVariableExternalStorage(*I))
+        QualType QT = VD->getType();
+        if (HasLocalVariableExternalStorage(VD))
           QT = Context->getPointerType(QT);
         QT.getAsStringInternal(FieldName, Context->getPrintingPolicy());
         QT.getAsStringInternal(ArgName, Context->getPrintingPolicy());
@@ -4219,14 +4213,13 @@ std::string RewriteModernObjC::SynthesizeBlockImpl(BlockExpr *CE,
       S += FieldName + ";\n";
     }
     // Output all "by ref" declarations.
-    for (SmallVectorImpl<ValueDecl *>::iterator I = BlockByRefDecls.begin(),
-         E = BlockByRefDecls.end(); I != E; ++I) {
+    for (ValueDecl *VD : BlockByRefDecls) {
       S += "  ";
-      std::string FieldName = (*I)->getNameAsString();
+      std::string FieldName = VD->getNameAsString();
       std::string ArgName = "_" + FieldName;
       {
         std::string TypeString;
-        RewriteByRefString(TypeString, FieldName, (*I));
+        RewriteByRefString(TypeString, FieldName, VD);
         TypeString += " *";
         FieldName = TypeString + FieldName;
         ArgName = TypeString + ArgName;
@@ -4238,24 +4231,21 @@ std::string RewriteModernObjC::SynthesizeBlockImpl(BlockExpr *CE,
     Constructor += ", int flags=0)";
     // Initialize all "by copy" arguments.
     bool firsTime = true;
-    for (SmallVectorImpl<ValueDecl *>::iterator I = BlockByCopyDecls.begin(),
-         E = BlockByCopyDecls.end(); I != E; ++I) {
-      std::string Name = (*I)->getNameAsString();
-        if (firsTime) {
-          Constructor += " : ";
-          firsTime = false;
-        }
-        else
-          Constructor += ", ";
-        if (isTopLevelBlockPointerType((*I)->getType()))
-          Constructor += Name + "((struct __block_impl *)_" + Name + ")";
-        else
-          Constructor += Name + "(_" + Name + ")";
+    for (const ValueDecl *VD : BlockByCopyDecls) {
+      std::string Name = VD->getNameAsString();
+      if (firsTime) {
+        Constructor += " : ";
+        firsTime = false;
+      } else
+        Constructor += ", ";
+      if (isTopLevelBlockPointerType(VD->getType()))
+        Constructor += Name + "((struct __block_impl *)_" + Name + ")";
+      else
+        Constructor += Name + "(_" + Name + ")";
     }
     // Initialize all "by ref" arguments.
-    for (SmallVectorImpl<ValueDecl *>::iterator I = BlockByRefDecls.begin(),
-         E = BlockByRefDecls.end(); I != E; ++I) {
-      std::string Name = (*I)->getNameAsString();
+    for (const ValueDecl *VD : BlockByRefDecls) {
+      std::string Name = VD->getNameAsString();
       if (firsTime) {
         Constructor += " : ";
         firsTime = false;
@@ -4340,17 +4330,11 @@ void RewriteModernObjC::SynthesizeBlockLiterals(SourceLocation FunLocStart,
       ValueDecl *VD = Exp->getDecl();
       BlockDeclRefs.push_back(Exp);
       if (!VD->hasAttr<BlocksAttr>()) {
-        if (!BlockByCopyDeclsPtrSet.count(VD)) {
-          BlockByCopyDeclsPtrSet.insert(VD);
-          BlockByCopyDecls.push_back(VD);
-        }
+        BlockByCopyDecls.insert(VD);
         continue;
       }
 
-      if (!BlockByRefDeclsPtrSet.count(VD)) {
-        BlockByRefDeclsPtrSet.insert(VD);
-        BlockByRefDecls.push_back(VD);
-      }
+      BlockByRefDecls.insert(VD);
 
       // imported objects in the inner blocks not used in the outer
       // blocks must be copied/disposed in the outer block as well.
@@ -4380,9 +4364,7 @@ void RewriteModernObjC::SynthesizeBlockLiterals(SourceLocation FunLocStart,
 
     BlockDeclRefs.clear();
     BlockByRefDecls.clear();
-    BlockByRefDeclsPtrSet.clear();
     BlockByCopyDecls.clear();
-    BlockByCopyDeclsPtrSet.clear();
     ImportedBlockDecls.clear();
   }
   if (RewriteSC) {
@@ -5160,20 +5142,12 @@ void RewriteModernObjC::CollectBlockDeclRefInfo(BlockExpr *Exp) {
   if (BlockDeclRefs.size()) {
     // Unique all "by copy" declarations.
     for (unsigned i = 0; i < BlockDeclRefs.size(); i++)
-      if (!BlockDeclRefs[i]->getDecl()->hasAttr<BlocksAttr>()) {
-        if (!BlockByCopyDeclsPtrSet.count(BlockDeclRefs[i]->getDecl())) {
-          BlockByCopyDeclsPtrSet.insert(BlockDeclRefs[i]->getDecl());
-          BlockByCopyDecls.push_back(BlockDeclRefs[i]->getDecl());
-        }
-      }
+      if (!BlockDeclRefs[i]->getDecl()->hasAttr<BlocksAttr>())
+        BlockByCopyDecls.insert(BlockDeclRefs[i]->getDecl());
     // Unique all "by ref" declarations.
     for (unsigned i = 0; i < BlockDeclRefs.size(); i++)
-      if (BlockDeclRefs[i]->getDecl()->hasAttr<BlocksAttr>()) {
-        if (!BlockByRefDeclsPtrSet.count(BlockDeclRefs[i]->getDecl())) {
-          BlockByRefDeclsPtrSet.insert(BlockDeclRefs[i]->getDecl());
-          BlockByRefDecls.push_back(BlockDeclRefs[i]->getDecl());
-        }
-      }
+      if (BlockDeclRefs[i]->getDecl()->hasAttr<BlocksAttr>())
+        BlockByRefDecls.insert(BlockDeclRefs[i]->getDecl());
     // Find any imported blocks...they will need special attention.
     for (unsigned i = 0; i < BlockDeclRefs.size(); i++)
       if (BlockDeclRefs[i]->getDecl()->hasAttr<BlocksAttr>() ||
@@ -5205,20 +5179,16 @@ Stmt *RewriteModernObjC::SynthBlockInitExpr(BlockExpr *Exp,
     for (unsigned i = 0; i < InnerBlockDeclRefs.size(); i++) {
       DeclRefExpr *Exp = InnerBlockDeclRefs[i];
       ValueDecl *VD = Exp->getDecl();
-      if (!VD->hasAttr<BlocksAttr>() && !BlockByCopyDeclsPtrSet.count(VD)) {
-      // We need to save the copied-in variables in nested
-      // blocks because it is needed at the end for some of the API generations.
-      // See SynthesizeBlockLiterals routine.
+      if (!VD->hasAttr<BlocksAttr>() && BlockByCopyDecls.insert(VD)) {
+        // We need to save the copied-in variables in nested
+        // blocks because it is needed at the end for some of the API
+        // generations. See SynthesizeBlockLiterals routine.
         InnerDeclRefs.push_back(Exp); countOfInnerDecls++;
         BlockDeclRefs.push_back(Exp);
-        BlockByCopyDeclsPtrSet.insert(VD);
-        BlockByCopyDecls.push_back(VD);
       }
-      if (VD->hasAttr<BlocksAttr>() && !BlockByRefDeclsPtrSet.count(VD)) {
+      if (VD->hasAttr<BlocksAttr>() && BlockByRefDecls.insert(VD)) {
         InnerDeclRefs.push_back(Exp); countOfInnerDecls++;
         BlockDeclRefs.push_back(Exp);
-        BlockByRefDeclsPtrSet.insert(VD);
-        BlockByRefDecls.push_back(VD);
       }
     }
     // Find any imported blocks...they will need special attention.
@@ -5299,47 +5269,43 @@ Stmt *RewriteModernObjC::SynthBlockInitExpr(BlockExpr *Exp,
   if (BlockDeclRefs.size()) {
     Expr *Exp;
     // Output all "by copy" declarations.
-    for (SmallVectorImpl<ValueDecl *>::iterator I = BlockByCopyDecls.begin(),
-         E = BlockByCopyDecls.end(); I != E; ++I) {
-      if (isObjCType((*I)->getType())) {
+    for (ValueDecl *VD : BlockByCopyDecls) {
+      if (isObjCType(VD->getType())) {
         // FIXME: Conform to ABI ([[obj retain] autorelease]).
-        FD = SynthBlockInitFunctionDecl((*I)->getName());
+        FD = SynthBlockInitFunctionDecl(VD->getName());
         Exp = new (Context) DeclRefExpr(*Context, FD, false, FD->getType(),
                                         VK_LValue, SourceLocation());
-        if (HasLocalVariableExternalStorage(*I)) {
-          QualType QT = (*I)->getType();
+        if (HasLocalVariableExternalStorage(VD)) {
+          QualType QT = VD->getType();
           QT = Context->getPointerType(QT);
           Exp = UnaryOperator::Create(const_cast<ASTContext &>(*Context), Exp,
                                       UO_AddrOf, QT, VK_PRValue, OK_Ordinary,
                                       SourceLocation(), false,
                                       FPOptionsOverride());
         }
-      } else if (isTopLevelBlockPointerType((*I)->getType())) {
-        FD = SynthBlockInitFunctionDecl((*I)->getName());
+      } else if (isTopLevelBlockPointerType(VD->getType())) {
+        FD = SynthBlockInitFunctionDecl(VD->getName());
         Arg = new (Context) DeclRefExpr(*Context, FD, false, FD->getType(),
                                         VK_LValue, SourceLocation());
         Exp = NoTypeInfoCStyleCastExpr(Context, Context->VoidPtrTy,
                                        CK_BitCast, Arg);
       } else {
-        FD = SynthBlockInitFunctionDecl((*I)->getName());
+        FD = SynthBlockInitFunctionDecl(VD->getName());
         Exp = new (Context) DeclRefExpr(*Context, FD, false, FD->getType(),
                                         VK_LValue, SourceLocation());
-        if (HasLocalVariableExternalStorage(*I)) {
-          QualType QT = (*I)->getType();
+        if (HasLocalVariableExternalStorage(VD)) {
+          QualType QT = VD->getType();
           QT = Context->getPointerType(QT);
           Exp = UnaryOperator::Create(const_cast<ASTContext &>(*Context), Exp,
                                       UO_AddrOf, QT, VK_PRValue, OK_Ordinary,
                                       SourceLocation(), false,
                                       FPOptionsOverride());
         }
-
       }
       InitExprs.push_back(Exp);
     }
     // Output all "by ref" declarations.
-    for (SmallVectorImpl<ValueDecl *>::iterator I = BlockByRefDecls.begin(),
-         E = BlockByRefDecls.end(); I != E; ++I) {
-      ValueDecl *ND = (*I);
+    for (ValueDecl *ND : BlockByRefDecls) {
       std::string Name(ND->getNameAsString());
       std::string RecName;
       RewriteByRefString(RecName, Name, ND, true);
@@ -5351,7 +5317,7 @@ Stmt *RewriteModernObjC::SynthBlockInitExpr(BlockExpr *Exp,
       assert(RD && "SynthBlockInitExpr(): Can't find RecordDecl");
       QualType castT = Context->getPointerType(Context->getTagDeclType(RD));
 
-      FD = SynthBlockInitFunctionDecl((*I)->getName());
+      FD = SynthBlockInitFunctionDecl(ND->getName());
       Exp = new (Context) DeclRefExpr(*Context, FD, false, FD->getType(),
                                       VK_LValue, SourceLocation());
       bool isNestedCapturedVar = false;
@@ -5406,9 +5372,7 @@ Stmt *RewriteModernObjC::SynthBlockInitExpr(BlockExpr *Exp,
 
   BlockDeclRefs.clear();
   BlockByRefDecls.clear();
-  BlockByRefDeclsPtrSet.clear();
   BlockByCopyDecls.clear();
-  BlockByCopyDeclsPtrSet.clear();
   ImportedBlockDecls.clear();
   return NewRep;
 }
diff --git clang/lib/Headers/CMakeLists.txt clang/lib/Headers/CMakeLists.txt
index e928b5b14282..a21e3901f63f 100644
--- clang/lib/Headers/CMakeLists.txt
+++ clang/lib/Headers/CMakeLists.txt
@@ -151,11 +151,13 @@ set(x86_files
   avx10_2_512convertintrin.h
   avx10_2_512minmaxintrin.h
   avx10_2_512niintrin.h
+  avx10_2_512satcvtdsintrin.h
   avx10_2_512satcvtintrin.h
   avx10_2bf16intrin.h
   avx10_2convertintrin.h
   avx10_2minmaxintrin.h
   avx10_2niintrin.h
+  avx10_2satcvtdsintrin.h
   avx10_2satcvtintrin.h
   avx2intrin.h
   avx512bf16intrin.h
diff --git clang/lib/Headers/avx10_2_512satcvtdsintrin.h clang/lib/Headers/avx10_2_512satcvtdsintrin.h
new file mode 100644
index 000000000000..5970ab033144
--- /dev/null
+++ clang/lib/Headers/avx10_2_512satcvtdsintrin.h
@@ -0,0 +1,303 @@
+/*===----- avx10_2_512satcvtdsintrin.h - AVX10_2_512SATCVTDS intrinsics ----===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2_512satcvtdsintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX10_2_512SATCVTDSINTRIN_H
+#define __AVX10_2_512SATCVTDSINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"),    \
+                 __min_vector_width__(512)))
+
+// 512 bit : Double -> Int
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d __A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
+      (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttspd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
+      (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
+      (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundpd_epi32(__A, __R)                                   \
+  ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(                          \
+      (__v8df)(__m512d)(__A), (__v8si)_mm256_undefined_si256(),                \
+      (__mmask8) - 1, (const int)(__R)))
+
+#define _mm512_mask_cvtts_roundpd_epi32(__W, __U, __A, __R)                    \
+  ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(                          \
+      (__v8df)(__m512d)(__A), (__v8si)(__m256i)(__W), (__mmask8)(__U),         \
+      (const int)(__R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi32(__U, __A, __R)                        \
+  ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(                          \
+      (__v8df)(__m512d)(__A), (__v8si)_mm256_setzero_si256(), (__mmask8)(__U), \
+      (const int)(__R)))
+
+// 512 bit : Double -> uInt
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d __A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
+      (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttspd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
+      (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
+      (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundpd_epu32(__A, __R)                                   \
+  ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(                         \
+      (__v8df)(__m512d)(__A), (__v8si)_mm256_undefined_si256(),                \
+      (__mmask8) - 1, (const int)(__R)))
+
+#define _mm512_mask_cvtts_roundpd_epu32(__W, __U, __A, __R)                    \
+  ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(                         \
+      (__v8df)(__m512d)(__A), (__v8si)(__m256i)(__W), (__mmask8)(__U),         \
+      (const int)(__R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu32(__U, __A, __R)                        \
+  ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(                         \
+      (__v8df)(__m512d)(__A), (__v8si)_mm256_setzero_si256(), (__mmask8)(__U), \
+      (const int)(__R)))
+
+//  512 bit : Double -> Long
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d __A) {
+  return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
+      (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttspd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+  return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
+      (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
+  return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
+      (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundpd_epi64(__A, __R)                                   \
+  ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(                          \
+      (__v8df)(__m512d)(__A), (__v8di)_mm512_undefined_epi32(),                \
+      (__mmask8) - 1, (const int)(__R)))
+
+#define _mm512_mask_cvtts_roundpd_epi64(__W, __U, __A, __R)                    \
+  ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(                          \
+      (__v8df)(__m512d)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U),         \
+      (const int)(__R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi64(__U, __A, __R)                        \
+  ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(                          \
+      (__v8df)(__m512d)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \
+      (const int)(__R)))
+
+// 512 bit : Double -> ULong
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d __A) {
+  return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
+      (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttspd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+  return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
+      (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
+  return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
+      (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundpd_epu64(__A, __R)                                   \
+  ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(                         \
+      (__v8df)(__m512d)(__A), (__v8di)_mm512_undefined_epi32(),                \
+      (__mmask8) - 1, (const int)(__R)))
+
+#define _mm512_mask_cvtts_roundpd_epu64(__W, __U, __A, __R)                    \
+  ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(                         \
+      (__v8df)(__m512d)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U),         \
+      (const int)(__R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu64(__U, __A, __R)                        \
+  ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(                         \
+      (__v8df)(__m512d)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \
+      (const int)(__R)))
+
+// 512 bit: Float -> int
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 __A) {
+  return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
+      (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttsps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
+  return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
+      (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
+  return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
+      (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundps_epi32(__A, __R)                                   \
+  ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(                          \
+      (__v16sf)(__m512)(__A), (__v16si)_mm512_undefined_epi32(),               \
+      (__mmask16) - 1, (const int)(__R)))
+
+#define _mm512_mask_cvtts_roundps_epi32(__W, __U, __A, __R)                    \
+  ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(                          \
+      (__v16sf)(__m512)(__A), (__v16si)(__m512i)(__W), (__mmask16)(__U),       \
+      (const int)(__R)))
+
+#define _mm512_maskz_cvtts_roundps_epi32(__U, __A, __R)                        \
+  ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(                          \
+      (__v16sf)(__m512)(__A), (__v16si)_mm512_setzero_si512(),                 \
+      (__mmask16)(__U), (const int)(__R)))
+
+// 512 bit: Float -> uint
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 __A) {
+  return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
+      (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttsps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
+  return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
+      (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
+  return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
+      (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundps_epu32(__A, __R)                                   \
+  ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(                         \
+      (__v16sf)(__m512)(__A), (__v16si)_mm512_undefined_epi32(),               \
+      (__mmask16) - 1, (const int)(__R)))
+
+#define _mm512_mask_cvtts_roundps_epu32(__W, __U, __A, __R)                    \
+  ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(                         \
+      (__v16sf)(__m512)(__A), (__v16si)(__m512i)(__W), (__mmask16)(__U),       \
+      (const int)(__R)))
+
+#define _mm512_maskz_cvtts_roundps_epu32(__U, __A, __R)                        \
+  ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(                         \
+      (__v16sf)(__m512)(__A), (__v16si)_mm512_setzero_si512(),                 \
+      (__mmask16)(__U), (const int)(__R)))
+
+// 512 bit : float -> long
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 __A) {
+  return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
+      (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttsps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+  return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
+      (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
+  return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
+      (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundps_epi64(__A, __R)                                   \
+  ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(                          \
+      (__v8sf)(__m256)(__A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
+      (const int)(__R)))
+
+#define _mm512_mask_cvtts_roundps_epi64(__W, __U, __A, __R)                    \
+  ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(                          \
+      (__v8sf)(__m256)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U),          \
+      (const int)(__R)))
+
+#define _mm512_maskz_cvtts_roundps_epi64(__U, __A, __R)                        \
+  ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(                          \
+      (__v8sf)(__m256)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U),  \
+      (const int)(__R)))
+
+// 512 bit : float -> ulong
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 __A) {
+  return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
+      (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttsps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+  return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
+      (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttsps_epu64(__mmask8 __U, __m256 __A) {
+  return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
+      (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundps_epu64(__A, __R)                                   \
+  ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(                         \
+      (__v8sf)(__m256)(__A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
+      (const int)(__R)))
+
+#define _mm512_mask_cvtts_roundps_epu64(__W, __U, __A, __R)                    \
+  ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(                         \
+      (__v8sf)(__m256)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U),          \
+      (const int)(__R)))
+
+#define _mm512_maskz_cvtts_roundps_epu64(__U, __A, __R)                        \
+  ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(                         \
+      (__v8sf)(__m256)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U),  \
+      (const int)(__R)))
+
+#undef __DEFAULT_FN_ATTRS
+#endif // __AVX10_2_512SATCVTDSINTRIN_H
diff --git clang/lib/Headers/avx10_2satcvtdsintrin.h clang/lib/Headers/avx10_2satcvtdsintrin.h
new file mode 100644
index 000000000000..59028436311e
--- /dev/null
+++ clang/lib/Headers/avx10_2satcvtdsintrin.h
@@ -0,0 +1,496 @@
+/*===----------- avx10_2satcvtdsintrin.h - AVX512SATCVTDS intrinsics --------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2satcvtdsintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2SATCVTDSINTRIN_H
+#define __AVX10_2SATCVTDSINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+                 __min_vector_width__(256)))
+
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+                 __min_vector_width__(128)))
+
+#define _mm_cvtts_roundsd_i32(__A, __R)                                        \
+  ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128)(__A), (const int)(__R)))
+
+#define _mm_cvtts_roundsd_si32(__A, __R)                                       \
+  ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128d)(__A), (const int)(__R)))
+
+#define _mm_cvtts_roundsd_u32(__A, __R)                                        \
+  ((unsigned int)__builtin_ia32_vcvttsd2usis32((__v2df)(__m128d)(__A),         \
+                                               (const int)(__R)))
+
+#define _mm_cvtts_roundss_i32(__A, __R)                                        \
+  ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(__A), (const int)(__R)))
+
+#define _mm_cvtts_roundss_si32(__A, __R)                                       \
+  ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(__A), (const int)(__R)))
+
+#define _mm_cvtts_roundss_u32(__A, __R)                                        \
+  ((unsigned int)__builtin_ia32_vcvttss2usis32((__v4sf)(__m128)(__A),          \
+                                               (const int)(__R)))
+
+#ifdef __x86_64__
+#define _mm_cvtts_roundss_u64(__A, __R)                                        \
+  ((unsigned long long)__builtin_ia32_vcvttss2usis64((__v4sf)(__m128)(__A),    \
+                                                     (const int)(__R)))
+
+#define _mm_cvtts_roundsd_u64(__A, __R)                                        \
+  ((unsigned long long)__builtin_ia32_vcvttsd2usis64((__v2df)(__m128d)(__A),   \
+                                                     (const int)(__R)))
+
+#define _mm_cvtts_roundss_i64(__A, __R)                                        \
+  ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(__A),              \
+                                           (const int)(__R)))
+
+#define _mm_cvtts_roundss_si64(__A, __R)                                       \
+  ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(__A),              \
+                                           (const int)(__R)))
+
+#define _mm_cvtts_roundsd_si64(__A, __R)                                       \
+  ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(__A),             \
+                                           (const int)(__R)))
+
+#define _mm_cvtts_roundsd_i64(__A, __R)                                        \
+  ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(__A),             \
+                                           (const int)(__R)))
+#endif /* __x86_64__ */
+
+// 128 Bit : Double -> int
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi32(__m128d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask(
+      (__v2df)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttspd_epi32(__m128i __W, __mmask8 __U, __m128d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask((__v2df)__A, (__v4si)__W,
+                                                      __U));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttspd_epi32(__mmask16 __U, __m128d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask(
+      (__v2df)__A, (__v4si)(__m128i)_mm_setzero_si128(), __U));
+}
+
+// 256 Bit : Double -> int
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
+_mm256_cvttspd_epi32(__m256d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
+      (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttspd_epi32(__m128i __W, __mmask8 __U, __m256d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
+      (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttspd_epi32(__mmask8 __U, __m256d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
+      (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundpd_epi32(__A, __R)                                   \
+  ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(                          \
+      (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(),            \
+      (__mmask8) - 1, (int)(__R)))
+
+#define _mm256_mask_cvtts_roundpd_epi32(__W, __U, __A, __R)                    \
+  ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(                          \
+      (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R)))
+
+#define _mm256_maskz_cvtts_roundpd_epi32(__U, __A, __R)                        \
+  ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(                          \
+      (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(),              \
+      (__mmask8)__U, (int)(__R)))
+
+// 128 Bit : Double -> uint
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu32(__m128d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask(
+      (__v2df)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttspd_epu32(__m128i __W, __mmask8 __U, __m128d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask(
+      (__v2df)__A, (__v4si)(__m128i)__W, (__mmask8)__U));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttspd_epu32(__mmask8 __U, __m128d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask(
+      (__v2df)__A, (__v4si)(__m128i)_mm_setzero_si128(), __U));
+}
+
+// 256 Bit : Double -> uint
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
+_mm256_cvttspd_epu32(__m256d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
+      (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttspd_epu32(__m128i __W, __mmask8 __U, __m256d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
+      (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttspd_epu32(__mmask8 __U, __m256d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
+      (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundpd_epu32(__A, __R)                                   \
+  ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(                         \
+      (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(),            \
+      (__mmask8) - 1, (int)(__R)))
+
+#define _mm256_mask_cvtts_roundpd_epu32(__W, __U, __A, __R)                    \
+  ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(                         \
+      (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R)))
+
+#define _mm256_maskz_cvtts_roundpd_epu32(__U, __A, __R)                        \
+  ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(                         \
+      (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(),              \
+      (__mmask8)__U, (int)(__R)))
+
+// 128 Bit : Double -> long
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi64(__m128d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask(
+      (__v2df)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttspd_epi64(__m128i __W, __mmask8 __U, __m128d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask((__v2df)__A, (__v2di)__W,
+                                                      (__mmask8)__U));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttspd_epi64(__mmask8 __U, __m128d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask(
+      (__v2df)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U));
+}
+
+// 256 Bit : Double -> long
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttspd_epi64(__m256d __A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
+      (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttspd_epi64(__m256i __W, __mmask8 __U, __m256d __A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
+      (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttspd_epi64(__mmask8 __U, __m256d __A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
+      (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundpd_epi64(__A, __R)                                   \
+  ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(                          \
+      (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1,           \
+      (int)__R))
+
+#define _mm256_mask_cvtts_roundpd_epi64(__W, __U, __A, __R)                    \
+  ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask((__v4df)__A, (__v4di)__W, \
+                                                     (__mmask8)__U, (int)__R))
+
+#define _mm256_maskz_cvtts_roundpd_epi64(__U, __A, __R)                        \
+  ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(                          \
+      (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R))
+
+// 128 Bit : Double -> ulong
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu64(__m128d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask(
+      (__v2df)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttspd_epu64(__m128i __W, __mmask8 __U, __m128d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask((__v2df)__A, (__v2di)__W,
+                                                       (__mmask8)__U));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttspd_epu64(__mmask8 __U, __m128d __A) {
+  return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask(
+      (__v2df)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U));
+}
+
+// 256 Bit : Double -> ulong
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttspd_epu64(__m256d __A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
+      (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttspd_epu64(__m256i __W, __mmask8 __U, __m256d __A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
+      (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttspd_epu64(__mmask8 __U, __m256d __A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
+      (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundpd_epu64(__A, __R)                                   \
+  ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(                         \
+      (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1,           \
+      (int)__R))
+
+#define _mm256_mask_cvtts_roundpd_epu64(__W, __U, __A, __R)                    \
+  ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(                         \
+      (__v4df)__A, (__v4di)__W, (__mmask8)__U, (int)__R))
+
+#define _mm256_maskz_cvtts_roundpd_epu64(__U, __A, __R)                        \
+  ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(                         \
+      (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R))
+
+// 128 Bit : float -> int
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi32(__m128 __A) {
+  return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask(
+      (__v4sf)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttsps_epi32(__m128i __W, __mmask8 __U, __m128 __A) {
+  return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask((__v4sf)__A, (__v4si)__W,
+                                                      (__mmask8)__U));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttsps_epi32(__mmask8 __U, __m128 __A) {
+  return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask(
+      (__v4sf)__A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)__U));
+}
+
+// 256 Bit : float -> int
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttsps_epi32(__m256 __A) {
+  return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
+      (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttsps_epi32(__m256i __W, __mmask8 __U, __m256 __A) {
+  return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
+      (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttsps_epi32(__mmask8 __U, __m256 __A) {
+  return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
+      (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundps_epi32(__A, __R)                                   \
+  ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(                          \
+      (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(),          \
+      (__mmask8) - 1, (int)(__R)))
+
+#define _mm256_mask_cvtts_roundps_epi32(__W, __U, __A, __R)                    \
+  ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(                          \
+      (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R)))
+
+#define _mm256_maskz_cvtts_roundps_epi32(__U, __A, __R)                        \
+  ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(                          \
+      (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(),            \
+      (__mmask8)__U, (int)(__R)))
+
+// 128 Bit : float -> uint
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu32(__m128 __A) {
+  return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask(
+      (__v4sf)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttsps_epu32(__m128i __W, __mmask8 __U, __m128 __A) {
+  return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask((__v4sf)__A, (__v4si)__W,
+                                                       (__mmask8)__U));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttsps_epu32(__mmask8 __U, __m128 __A) {
+  return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask(
+      (__v4sf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U));
+}
+
+// 256 Bit : float -> uint
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttsps_epu32(__m256 __A) {
+  return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
+      (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttsps_epu32(__m256i __W, __mmask8 __U, __m256 __A) {
+  return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
+      (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttsps_epu32(__mmask8 __U, __m256 __A) {
+  return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
+      (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundps_epu32(__A, __R)                                   \
+  ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(                         \
+      (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(),          \
+      (__mmask8) - 1, (int)(__R)))
+
+#define _mm256_mask_cvtts_roundps_epu32(__W, __U, __A, __R)                    \
+  ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(                         \
+      (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R)))
+
+#define _mm256_maskz_cvtts_roundps_epu32(__U, __A, __R)                        \
+  ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(                         \
+      (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(),            \
+      (__mmask8)__U, (int)(__R)))
+
+// 128 bit : float -> long
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi64(__m128 __A) {
+  return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
+      (__v4sf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttsps_epi64(__m128i __W, __mmask8 __U, __m128 __A) {
+  return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
+      (__v4sf)__A, (__v2di)(__m128i)__W, (__mmask8)__U));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttsps_epi64(__mmask8 __U, __m128 __A) {
+  return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
+      (__v4sf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U));
+}
+// 256 bit : float -> long
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttsps_epi64(__m128 __A) {
+  return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
+      (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttsps_epi64(__m256i __W, __mmask8 __U, __m128 __A) {
+  return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
+      (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttsps_epi64(__mmask8 __U, __m128 __A) {
+  return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
+      (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundps_epi64(__A, __R)                                   \
+  ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(                          \
+      (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1,   \
+      (int)__R))
+
+#define _mm256_mask_cvtts_roundps_epi64(__W, __U, __A, __R)                    \
+  ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(                          \
+      (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R))
+
+#define _mm256_maskz_cvtts_roundps_epi64(__U, __A, __R)                        \
+  ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(                          \
+      (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U,      \
+      (int)__R))
+
+// 128 bit : float -> ulong
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu64(__m128 __A) {
+  return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
+      (__v4sf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttsps_epu64(__m128i __W, __mmask8 __U, __m128 __A) {
+  return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
+      (__v4sf)__A, (__v2di)(__m128i)__W, (__mmask8)__U));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttsps_epu64(__mmask8 __U, __m128 __A) {
+  return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
+      (__v4sf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U));
+}
+// 256 bit : float -> ulong
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttsps_epu64(__m128 __A) {
+  return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
+      (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttsps_epu64(__m256i __W, __mmask8 __U, __m128 __A) {
+  return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
+      (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttsps_epu64(__mmask8 __U, __m128 __A) {
+  return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
+      (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundps_epu64(__A, __R)                                   \
+  ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(                         \
+      (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1,   \
+      (int)__R))
+
+#define _mm256_mask_cvtts_roundps_epu64(__W, __U, __A, __R)                    \
+  ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(                         \
+      (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R))
+
+#define _mm256_maskz_cvtts_roundps_epu64(__U, __A, __R)                        \
+  ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(                         \
+      (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U,      \
+      (int)__R))
+
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
+#endif // __AVX10_2SATCVTDSINTRIN_H
diff --git clang/lib/Headers/hexagon_types.h clang/lib/Headers/hexagon_types.h
index 029727cc4817..8e73fad4bcd4 100644
--- clang/lib/Headers/hexagon_types.h
+++ clang/lib/Headers/hexagon_types.h
@@ -1,7 +1,11 @@
-/******************************************************************************/
-/*   (c) 2020 Qualcomm Innovation Center, Inc. All rights reserved.           */
-/*                                                                            */
-/******************************************************************************/
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
 #ifndef HEXAGON_TYPES_H
 #define HEXAGON_TYPES_H
 
diff --git clang/lib/Headers/hlsl/hlsl_intrinsics.h clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 6d38b668fe77..7a1edd93984d 100644
--- clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1603,6 +1603,32 @@ double3 saturate(double3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate)
 double4 saturate(double4);
 
+//===----------------------------------------------------------------------===//
+// select builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T select(bool Cond, T TrueVal, T FalseVal)
+/// \brief ternary operator.
+/// \param Cond The Condition input value.
+/// \param TrueVal The Value returned if Cond is true.
+/// \param FalseVal The Value returned if Cond is false.
+
+template <typename T>
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_select)
+T select(bool, T, T);
+
+/// \fn vector<T,Sz> select(vector<bool,Sz> Conds, vector<T,Sz> TrueVals,
+///                         vector<T,Sz> FalseVals)
+/// \brief ternary operator for vectors. All vectors must be the same size.
+/// \param Conds The Condition input values.
+/// \param TrueVals The vector values are chosen from when conditions are true.
+/// \param FalseVals The vector values are chosen from when conditions are
+/// false.
+
+template <typename T, int Sz>
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_select)
+vector<T, Sz> select(vector<bool, Sz>, vector<T, Sz>, vector<T, Sz>);
+
 //===----------------------------------------------------------------------===//
 // sin builtins
 //===----------------------------------------------------------------------===//
@@ -1796,5 +1822,80 @@ _HLSL_AVAILABILITY(shadermodel, 6.0)
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_get_lane_index)
 __attribute__((convergent)) uint WaveGetLaneIndex();
 
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_is_first_lane)
+__attribute__((convergent)) bool WaveIsFirstLane();
+
+//===----------------------------------------------------------------------===//
+// sign builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T sign(T Val)
+/// \brief Returns -1 if \a Val is less than zero; 0 if \a Val equals zero; and
+/// 1 if \a Val is greater than zero. \param Val The input value.
+
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int sign(int16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int2 sign(int16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int3 sign(int16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int4 sign(int16_t4);
+#endif
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int sign(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int2 sign(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int3 sign(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int4 sign(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int sign(int);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int2 sign(int2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int3 sign(int3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int4 sign(int4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int sign(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int2 sign(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int3 sign(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int4 sign(float4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int sign(int64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int2 sign(int64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int3 sign(int64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int4 sign(int64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int sign(double);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int2 sign(double2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int3 sign(double3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
+int4 sign(double4);
 } // namespace hlsl
 #endif //_HLSL_HLSL_INTRINSICS_H_
diff --git clang/lib/Headers/immintrin.h clang/lib/Headers/immintrin.h
index 30fcc028958f..280154f3c102 100644
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -653,6 +653,7 @@ _storebe_i64(void * __P, long long __D) {
 #include <avx10_2convertintrin.h>
 #include <avx10_2minmaxintrin.h>
 #include <avx10_2niintrin.h>
+#include <avx10_2satcvtdsintrin.h>
 #include <avx10_2satcvtintrin.h>
 #endif
 
@@ -661,6 +662,7 @@ _storebe_i64(void * __P, long long __D) {
 #include <avx10_2_512convertintrin.h>
 #include <avx10_2_512minmaxintrin.h>
 #include <avx10_2_512niintrin.h>
+#include <avx10_2_512satcvtdsintrin.h>
 #include <avx10_2_512satcvtintrin.h>
 #endif
 
diff --git clang/lib/Headers/wasm_simd128.h clang/lib/Headers/wasm_simd128.h
index 67d12f6f2cf4..14e36e85da8e 100644
--- clang/lib/Headers/wasm_simd128.h
+++ clang/lib/Headers/wasm_simd128.h
@@ -1888,18 +1888,17 @@ static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_splat(float __a) {
   return (v128_t)__builtin_wasm_splat_f16x8(__a);
 }
 
-static __inline__ float __FP16_FN_ATTRS wasm_f16x8_extract_lane(v128_t __a,
-                                                                int __i)
-    __REQUIRE_CONSTANT(__i) {
-  return __builtin_wasm_extract_lane_f16x8((__f16x8)__a, __i);
-}
+#ifdef __wasm_fp16__
+// TODO Replace the following macros with regular C functions and use normal
+// target-independent vector code like the other replace/extract instructions.
 
-static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_replace_lane(v128_t __a,
-                                                                 int __i,
-                                                                 float __b)
-    __REQUIRE_CONSTANT(__i) {
-  return (v128_t)__builtin_wasm_replace_lane_f16x8((__f16x8)__a, __i, __b);
-}
+#define wasm_f16x8_extract_lane(__a, __i)                                      \
+  (__builtin_wasm_extract_lane_f16x8((__f16x8)(__a), __i))
+
+#define wasm_f16x8_replace_lane(__a, __i, __b)                                 \
+  ((v128_t)__builtin_wasm_replace_lane_f16x8((__f16x8)(__a), __i, __b))
+
+#endif
 
 static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_abs(v128_t __a) {
   return (v128_t)__builtin_wasm_abs_f16x8((__f16x8)__a);
diff --git clang/lib/Lex/Lexer.cpp clang/lib/Lex/Lexer.cpp
index ef1e1f4bd9ae..8647e9f2f27c 100644
--- clang/lib/Lex/Lexer.cpp
+++ clang/lib/Lex/Lexer.cpp
@@ -2428,7 +2428,9 @@ bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
                           ? diag::warn_cxx98_compat_unicode_literal
                           : diag::warn_c99_compat_unicode_literal);
     else if (Kind == tok::utf8_char_constant)
-      Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
+      Diag(BufferPtr, LangOpts.CPlusPlus
+                          ? diag::warn_cxx14_compat_u8_character_literal
+                          : diag::warn_c17_compat_u8_character_literal);
   }
 
   char C = getAndAdvanceChar(CurPtr, Result);
diff --git clang/lib/Parse/ParseDecl.cpp clang/lib/Parse/ParseDecl.cpp
index 78d729c5ef7d..1f56884be392 100644
--- clang/lib/Parse/ParseDecl.cpp
+++ clang/lib/Parse/ParseDecl.cpp
@@ -146,6 +146,86 @@ void Parser::ParseAttributes(unsigned WhichAttrKinds, ParsedAttributes &Attrs,
   } while (MoreToParse);
 }
 
+/// ParseSingleGNUAttribute - Parse a single GNU attribute.
+///
+/// [GNU]  attrib:
+///          empty
+///          attrib-name
+///          attrib-name '(' identifier ')'
+///          attrib-name '(' identifier ',' nonempty-expr-list ')'
+///          attrib-name '(' argument-expression-list [C99 6.5.2] ')'
+///
+/// [GNU]  attrib-name:
+///          identifier
+///          typespec
+///          typequal
+///          storageclass
+bool Parser::ParseSingleGNUAttribute(ParsedAttributes &Attrs,
+                                     SourceLocation &EndLoc,
+                                     LateParsedAttrList *LateAttrs,
+                                     Declarator *D) {
+  IdentifierInfo *AttrName = Tok.getIdentifierInfo();
+  if (!AttrName)
+    return true;
+
+  SourceLocation AttrNameLoc = ConsumeToken();
+
+  if (Tok.isNot(tok::l_paren)) {
+    Attrs.addNew(AttrName, AttrNameLoc, nullptr, AttrNameLoc, nullptr, 0,
+                 ParsedAttr::Form::GNU());
+    return false;
+  }
+
+  bool LateParse = false;
+  if (!LateAttrs)
+    LateParse = false;
+  else if (LateAttrs->lateAttrParseExperimentalExtOnly()) {
+    // The caller requested that this attribute **only** be late
+    // parsed for `LateAttrParseExperimentalExt` attributes. This will
+    // only be late parsed if the experimental language option is enabled.
+    LateParse = getLangOpts().ExperimentalLateParseAttributes &&
+                IsAttributeLateParsedExperimentalExt(*AttrName);
+  } else {
+    // The caller did not restrict late parsing to only
+    // `LateAttrParseExperimentalExt` attributes so late parse
+    // both `LateAttrParseStandard` and `LateAttrParseExperimentalExt`
+    // attributes.
+    LateParse = IsAttributeLateParsedExperimentalExt(*AttrName) ||
+                IsAttributeLateParsedStandard(*AttrName);
+  }
+
+  // Handle "parameterized" attributes
+  if (!LateParse) {
+    ParseGNUAttributeArgs(AttrName, AttrNameLoc, Attrs, &EndLoc, nullptr,
+                          SourceLocation(), ParsedAttr::Form::GNU(), D);
+    return false;
+  }
+
+  // Handle attributes with arguments that require late parsing.
+  LateParsedAttribute *LA =
+      new LateParsedAttribute(this, *AttrName, AttrNameLoc);
+  LateAttrs->push_back(LA);
+
+  // Attributes in a class are parsed at the end of the class, along
+  // with other late-parsed declarations.
+  if (!ClassStack.empty() && !LateAttrs->parseSoon())
+    getCurrentClass().LateParsedDeclarations.push_back(LA);
+
+  // Be sure ConsumeAndStoreUntil doesn't see the start l_paren, since it
+  // recursively consumes balanced parens.
+  LA->Toks.push_back(Tok);
+  ConsumeParen();
+  // Consume everything up to and including the matching right parens.
+  ConsumeAndStoreUntil(tok::r_paren, LA->Toks, /*StopAtSemi=*/true);
+
+  Token Eof;
+  Eof.startToken();
+  Eof.setLocation(Tok.getLocation());
+  LA->Toks.push_back(Eof);
+
+  return false;
+}
+
 /// ParseGNUAttributes - Parse a non-empty attributes list.
 ///
 /// [GNU] attributes:
@@ -223,64 +303,9 @@ void Parser::ParseGNUAttributes(ParsedAttributes &Attrs,
             AttributeCommonInfo::Syntax::AS_GNU);
         break;
       }
-      IdentifierInfo *AttrName = Tok.getIdentifierInfo();
-      if (!AttrName)
-        break;
 
-      SourceLocation AttrNameLoc = ConsumeToken();
-
-      if (Tok.isNot(tok::l_paren)) {
-        Attrs.addNew(AttrName, AttrNameLoc, nullptr, AttrNameLoc, nullptr, 0,
-                     ParsedAttr::Form::GNU());
-        continue;
-      }
-
-      bool LateParse = false;
-      if (!LateAttrs)
-        LateParse = false;
-      else if (LateAttrs->lateAttrParseExperimentalExtOnly()) {
-        // The caller requested that this attribute **only** be late
-        // parsed for `LateAttrParseExperimentalExt` attributes. This will
-        // only be late parsed if the experimental language option is enabled.
-        LateParse = getLangOpts().ExperimentalLateParseAttributes &&
-                    IsAttributeLateParsedExperimentalExt(*AttrName);
-      } else {
-        // The caller did not restrict late parsing to only
-        // `LateAttrParseExperimentalExt` attributes so late parse
-        // both `LateAttrParseStandard` and `LateAttrParseExperimentalExt`
-        // attributes.
-        LateParse = IsAttributeLateParsedExperimentalExt(*AttrName) ||
-                    IsAttributeLateParsedStandard(*AttrName);
-      }
-
-      // Handle "parameterized" attributes
-      if (!LateParse) {
-        ParseGNUAttributeArgs(AttrName, AttrNameLoc, Attrs, &EndLoc, nullptr,
-                              SourceLocation(), ParsedAttr::Form::GNU(), D);
-        continue;
-      }
-
-      // Handle attributes with arguments that require late parsing.
-      LateParsedAttribute *LA =
-          new LateParsedAttribute(this, *AttrName, AttrNameLoc);
-      LateAttrs->push_back(LA);
-
-      // Attributes in a class are parsed at the end of the class, along
-      // with other late-parsed declarations.
-      if (!ClassStack.empty() && !LateAttrs->parseSoon())
-        getCurrentClass().LateParsedDeclarations.push_back(LA);
-
-      // Be sure ConsumeAndStoreUntil doesn't see the start l_paren, since it
-      // recursively consumes balanced parens.
-      LA->Toks.push_back(Tok);
-      ConsumeParen();
-      // Consume everything up to and including the matching right parens.
-      ConsumeAndStoreUntil(tok::r_paren, LA->Toks, /*StopAtSemi=*/true);
-
-      Token Eof;
-      Eof.startToken();
-      Eof.setLocation(Tok.getLocation());
-      LA->Toks.push_back(Eof);
+      if (ParseSingleGNUAttribute(Attrs, EndLoc, LateAttrs, D))
+        break;
     } while (Tok.is(tok::comma));
 
     if (ExpectAndConsume(tok::r_paren))
@@ -2484,8 +2509,9 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS,
 
       // P2718R0 - Lifetime extension in range-based for loops.
       if (getLangOpts().CPlusPlus23) {
-        auto &LastRecord = Actions.ExprEvalContexts.back();
+        auto &LastRecord = Actions.currentEvaluationContext();
         LastRecord.InLifetimeExtendingContext = true;
+        LastRecord.RebuildDefaultArgOrDefaultInit = true;
       }
 
       if (getLangOpts().OpenMP)
diff --git clang/lib/Parse/ParseDeclCXX.cpp clang/lib/Parse/ParseDeclCXX.cpp
index 7ca27d00c0bc..6370da1fab00 100644
--- clang/lib/Parse/ParseDeclCXX.cpp
+++ clang/lib/Parse/ParseDeclCXX.cpp
@@ -1076,7 +1076,7 @@ Decl *Parser::ParseStaticAssertDeclaration(SourceLocation &DeclEnd) {
     }
 
     bool ParseAsExpression = false;
-    if (getLangOpts().CPlusPlus26) {
+    if (getLangOpts().CPlusPlus11) {
       for (unsigned I = 0;; ++I) {
         const Token &T = GetLookAheadToken(I);
         if (T.is(tok::r_paren))
@@ -1088,9 +1088,13 @@ Decl *Parser::ParseStaticAssertDeclaration(SourceLocation &DeclEnd) {
       }
     }
 
-    if (ParseAsExpression)
+    if (ParseAsExpression) {
+      Diag(Tok,
+           getLangOpts().CPlusPlus26
+               ? diag::warn_cxx20_compat_static_assert_user_generated_message
+               : diag::ext_cxx_static_assert_user_generated_message);
       AssertMessage = ParseConstantExpressionInExprEvalContext();
-    else if (tokenIsLikeStringLiteral(Tok, getLangOpts()))
+    } else if (tokenIsLikeStringLiteral(Tok, getLangOpts()))
       AssertMessage = ParseUnevaluatedStringLiteralExpression();
     else {
       Diag(Tok, diag::err_expected_string_literal)
diff --git clang/lib/Parse/ParseStmt.cpp clang/lib/Parse/ParseStmt.cpp
index bdb3fc051d0b..9188799fce13 100644
--- clang/lib/Parse/ParseStmt.cpp
+++ clang/lib/Parse/ParseStmt.cpp
@@ -228,7 +228,7 @@ Retry:
         return StmtError();
       }
 
-      // If the identifier was typo-corrected, try again.
+      // If the identifier was annotated, try again.
       if (Tok.isNot(tok::identifier))
         goto Retry;
     }
diff --git clang/lib/Parse/ParseTemplate.cpp clang/lib/Parse/ParseTemplate.cpp
index 6ecfc15757f3..de29652abbfd 100644
--- clang/lib/Parse/ParseTemplate.cpp
+++ clang/lib/Parse/ParseTemplate.cpp
@@ -959,7 +959,7 @@ Parser::ParseNonTypeTemplateParameter(unsigned Depth, unsigned Position) {
       ++CurTemplateDepthTracker;
       EnterExpressionEvaluationContext ConstantEvaluated(
           Actions, Sema::ExpressionEvaluationContext::ConstantEvaluated);
-      DefaultArg = Actions.CorrectDelayedTyposInExpr(ParseInitializer());
+      DefaultArg = Actions.ActOnConstantExpression(ParseInitializer());
       if (DefaultArg.isInvalid())
         SkipUntil(tok::comma, tok::greater, StopAtSemi | StopBeforeMatch);
     }
diff --git clang/lib/Sema/AnalysisBasedWarnings.cpp clang/lib/Sema/AnalysisBasedWarnings.cpp
index e6ce89dc7ec4..117b2c8bc579 100644
--- clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2304,6 +2304,20 @@ public:
     }
   }
 
+  void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+                            ASTContext &Ctx,
+                            const Expr *UnsafeArg = nullptr) override {
+    S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call)
+        << Call->getDirectCallee() // We've checked there is a direct callee
+        << Call->getSourceRange();
+    if (PrintfInfo > 0) {
+      SourceRange R =
+          UnsafeArg ? UnsafeArg->getSourceRange() : Call->getSourceRange();
+      S.Diag(R.getBegin(), diag::note_unsafe_buffer_printf_call)
+          << PrintfInfo << R;
+    }
+  }
+
   void handleUnsafeOperationInContainer(const Stmt *Operation,
                                         bool IsRelatedToDecl,
                                         ASTContext &Ctx) override {
@@ -2382,6 +2396,10 @@ public:
     return S.Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container, Loc);
   }
 
+  bool ignoreUnsafeBufferInLibcCall(const SourceLocation &Loc) const override {
+    return S.Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, Loc);
+  }
+
   // Returns the text representation of clang::unsafe_buffer_usage attribute.
   // `WSSuffix` holds customized "white-space"s, e.g., newline or whilespace
   // characters.
@@ -2548,6 +2566,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
         !Diags.isIgnored(diag::warn_unsafe_buffer_variable,
                          Node->getBeginLoc()) ||
         !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container,
+                         Node->getBeginLoc()) ||
+        !Diags.isIgnored(diag::warn_unsafe_buffer_libc_call,
                          Node->getBeginLoc())) {
       clang::checkUnsafeBufferUsage(Node, R,
                                     UnsafeBufferUsageShouldEmitSuggestions);
@@ -2560,7 +2580,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
   if (!Diags.isIgnored(diag::warn_unsafe_buffer_operation, SourceLocation()) ||
       !Diags.isIgnored(diag::warn_unsafe_buffer_variable, SourceLocation()) ||
       !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container,
-                       SourceLocation())) {
+                       SourceLocation()) ||
+      !Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation())) {
     CallableVisitor(CallAnalyzers).TraverseTranslationUnitDecl(TU);
   }
 }
diff --git clang/lib/Sema/CheckExprLifetime.cpp clang/lib/Sema/CheckExprLifetime.cpp
index f7540a6e3a89..f62e18543851 100644
--- clang/lib/Sema/CheckExprLifetime.cpp
+++ clang/lib/Sema/CheckExprLifetime.cpp
@@ -237,13 +237,11 @@ static bool pathContainsInit(IndirectLocalPath &Path) {
 
 static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
                                              Expr *Init, LocalVisitor Visit,
-                                             bool RevisitSubinits,
-                                             bool EnableLifetimeWarnings);
+                                             bool RevisitSubinits);
 
 static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
                                                   Expr *Init, ReferenceKind RK,
-                                                  LocalVisitor Visit,
-                                                  bool EnableLifetimeWarnings);
+                                                  LocalVisitor Visit);
 
 template <typename T> static bool isRecordWithAttr(QualType Type) {
   if (auto *RD = Type->getAsCXXRecordDecl())
@@ -290,7 +288,8 @@ static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
         // Map and set types.
         .Cases("find", "equal_range", "lower_bound", "upper_bound", true)
         .Default(false);
-  } else if (Callee->getReturnType()->isReferenceType()) {
+  }
+  if (Callee->getReturnType()->isReferenceType()) {
     if (!Callee->getIdentifier()) {
       auto OO = Callee->getOverloadedOperator();
       return OO == OverloadedOperatorKind::OO_Subscript ||
@@ -318,7 +317,8 @@ static bool shouldTrackFirstArgument(const FunctionDecl *FD) {
         .Cases("end", "rend", "cend", "crend", true)
         .Case("data", true)
         .Default(false);
-  } else if (FD->getReturnType()->isReferenceType()) {
+  }
+  if (FD->getReturnType()->isReferenceType()) {
     return llvm::StringSwitch<bool>(FD->getName())
         .Cases("get", "any_cast", true)
         .Default(false);
@@ -330,7 +330,7 @@ static bool shouldTrackFirstArgument(const FunctionDecl *FD) {
 // We assuments that a normal assingment operator always returns *this, that is,
 // an lvalue reference that is the same type as the implicit object parameter
 // (or the LHS for a non-member operator$=).
-static bool isNormalAsisgnmentOperator(const FunctionDecl *FD) {
+static bool isNormalAssignmentOperator(const FunctionDecl *FD) {
   OverloadedOperatorKind OO = FD->getDeclName().getCXXOverloadedOperator();
   if (OO == OO_Equal || isCompoundAssignmentOperator(OO)) {
     QualType RetT = FD->getReturnType();
@@ -364,13 +364,12 @@ static bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) {
       return true;
   }
 
-  return isNormalAsisgnmentOperator(FD);
+  return isNormalAssignmentOperator(FD);
 }
 
 // Visit lifetimebound or gsl-pointer arguments.
 static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call,
-                                       LocalVisitor Visit,
-                                       bool EnableLifetimeWarnings) {
+                                       LocalVisitor Visit) {
   const FunctionDecl *Callee;
   ArrayRef<Expr *> Args;
 
@@ -385,6 +384,8 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call,
   if (!Callee)
     return;
 
+  bool EnableGSLAnalysis = !Callee->getASTContext().getDiagnostics().isIgnored(
+      diag::warn_dangling_lifetime_pointer, SourceLocation());
   Expr *ObjectArg = nullptr;
   if (isa<CXXOperatorCallExpr>(Call) && Callee->isCXXInstanceMember()) {
     ObjectArg = Args[0];
@@ -397,22 +398,25 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call,
     Path.push_back({IndirectLocalPathEntry::LifetimeBoundCall, Arg, D});
     if (Arg->isGLValue())
       visitLocalsRetainedByReferenceBinding(Path, Arg, RK_ReferenceBinding,
-                                            Visit,
-                                            /*EnableLifetimeWarnings=*/false);
+                                            Visit);
     else
-      visitLocalsRetainedByInitializer(Path, Arg, Visit, true,
-                                       /*EnableLifetimeWarnings=*/false);
+      visitLocalsRetainedByInitializer(Path, Arg, Visit, true);
     Path.pop_back();
   };
-  auto VisitGSLPointerArg = [&](const Decl *D, Expr *Arg, bool Value) {
+  auto VisitGSLPointerArg = [&](const FunctionDecl *Callee, Expr *Arg) {
     // We are not interested in the temporary base objects of gsl Pointers:
     //   Temp().ptr; // Here ptr might not dangle.
     if (isa<MemberExpr>(Arg->IgnoreImpCasts()))
       return;
-    // Once we initialized a value with a reference, it can no longer dangle.
-    if (!Value) {
+    auto ReturnType = Callee->getReturnType();
+
+    // Once we initialized a value with a non gsl-owner reference, it can no
+    // longer dangle.
+    if (ReturnType->isReferenceType() &&
+        !isRecordWithAttr<OwnerAttr>(ReturnType->getPointeeType())) {
       for (const IndirectLocalPathEntry &PE : llvm::reverse(Path)) {
-        if (PE.Kind == IndirectLocalPathEntry::GslReferenceInit)
+        if (PE.Kind == IndirectLocalPathEntry::GslReferenceInit ||
+            PE.Kind == IndirectLocalPathEntry::LifetimeBoundCall)
           continue;
         if (PE.Kind == IndirectLocalPathEntry::GslPointerInit ||
             PE.Kind == IndirectLocalPathEntry::GslPointerAssignment)
@@ -420,16 +424,15 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call,
         break;
       }
     }
-    Path.push_back({Value ? IndirectLocalPathEntry::GslPointerInit
-                          : IndirectLocalPathEntry::GslReferenceInit,
-                    Arg, D});
+    Path.push_back({ReturnType->isReferenceType()
+                        ? IndirectLocalPathEntry::GslReferenceInit
+                        : IndirectLocalPathEntry::GslPointerInit,
+                    Arg, Callee});
     if (Arg->isGLValue())
       visitLocalsRetainedByReferenceBinding(Path, Arg, RK_ReferenceBinding,
-                                            Visit,
-                                            /*EnableLifetimeWarnings=*/true);
+                                            Visit);
     else
-      visitLocalsRetainedByInitializer(Path, Arg, Visit, true,
-                                       /*EnableLifetimeWarnings=*/true);
+      visitLocalsRetainedByInitializer(Path, Arg, Visit, true);
     Path.pop_back();
   };
 
@@ -452,11 +455,10 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call,
       CheckCoroObjArg = false;
     if (implicitObjectParamIsLifetimeBound(Callee) || CheckCoroObjArg)
       VisitLifetimeBoundArg(Callee, ObjectArg);
-    else if (EnableLifetimeWarnings) {
+    else if (EnableGSLAnalysis) {
       if (auto *CME = dyn_cast<CXXMethodDecl>(Callee);
           CME && shouldTrackImplicitObjectArg(CME))
-        VisitGSLPointerArg(Callee, ObjectArg,
-                           !Callee->getReturnType()->isReferenceType());
+        VisitGSLPointerArg(Callee, ObjectArg);
     }
   }
 
@@ -465,15 +467,13 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call,
        I != N; ++I) {
     if (CheckCoroCall || Callee->getParamDecl(I)->hasAttr<LifetimeBoundAttr>())
       VisitLifetimeBoundArg(Callee->getParamDecl(I), Args[I]);
-    else if (EnableLifetimeWarnings && I == 0) {
+    else if (EnableGSLAnalysis && I == 0) {
       if (shouldTrackFirstArgument(Callee)) {
-        VisitGSLPointerArg(Callee, Args[0],
-                           !Callee->getReturnType()->isReferenceType());
-      } else {
-        if (auto *CCE = dyn_cast<CXXConstructExpr>(Call);
-            CCE && CCE->getConstructor()->getParent()->hasAttr<PointerAttr>())
-          VisitGSLPointerArg(CCE->getConstructor()->getParamDecl(0), Args[0],
-                             true);
+        VisitGSLPointerArg(Callee, Args[0]);
+      } else if (auto *CCE = dyn_cast<CXXConstructExpr>(Call);
+                 CCE &&
+                 CCE->getConstructor()->getParent()->hasAttr<PointerAttr>()) {
+        VisitGSLPointerArg(CCE->getConstructor(), Args[0]);
       }
     }
   }
@@ -483,8 +483,7 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call,
 /// glvalue expression \c Init.
 static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
                                                   Expr *Init, ReferenceKind RK,
-                                                  LocalVisitor Visit,
-                                                  bool EnableLifetimeWarnings) {
+                                                  LocalVisitor Visit) {
   RevertToOldSizeRAII RAII(Path);
 
   // Walk past any constructs which we can lifetime-extend across.
@@ -521,8 +520,7 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
       else
         // We can't lifetime extend through this but we might still find some
         // retained temporaries.
-        return visitLocalsRetainedByInitializer(Path, Init, Visit, true,
-                                                EnableLifetimeWarnings);
+        return visitLocalsRetainedByInitializer(Path, Init, Visit, true);
     }
 
     // Step into CXXDefaultInitExprs so we can diagnose cases where a
@@ -536,21 +534,18 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
 
   if (auto *MTE = dyn_cast<MaterializeTemporaryExpr>(Init)) {
     if (Visit(Path, Local(MTE), RK))
-      visitLocalsRetainedByInitializer(Path, MTE->getSubExpr(), Visit, true,
-                                       EnableLifetimeWarnings);
+      visitLocalsRetainedByInitializer(Path, MTE->getSubExpr(), Visit, true);
   }
 
   if (auto *M = dyn_cast<MemberExpr>(Init)) {
     // Lifetime of a non-reference type field is same as base object.
     if (auto *F = dyn_cast<FieldDecl>(M->getMemberDecl());
         F && !F->getType()->isReferenceType())
-      visitLocalsRetainedByInitializer(Path, M->getBase(), Visit, true,
-                                       EnableLifetimeWarnings);
+      visitLocalsRetainedByInitializer(Path, M->getBase(), Visit, true);
   }
 
   if (isa<CallExpr>(Init))
-    return visitFunctionCallArguments(Path, Init, Visit,
-                                      EnableLifetimeWarnings);
+    return visitFunctionCallArguments(Path, Init, Visit);
 
   switch (Init->getStmtClass()) {
   case Stmt::DeclRefExprClass: {
@@ -569,8 +564,7 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
       } else if (VD->getInit() && !isVarOnPath(Path, VD)) {
         Path.push_back({IndirectLocalPathEntry::VarInit, DRE, VD});
         visitLocalsRetainedByReferenceBinding(Path, VD->getInit(),
-                                              RK_ReferenceBinding, Visit,
-                                              EnableLifetimeWarnings);
+                                              RK_ReferenceBinding, Visit);
       }
     }
     break;
@@ -582,15 +576,13 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
     // handling all sorts of rvalues passed to a unary operator.
     const UnaryOperator *U = cast<UnaryOperator>(Init);
     if (U->getOpcode() == UO_Deref)
-      visitLocalsRetainedByInitializer(Path, U->getSubExpr(), Visit, true,
-                                       EnableLifetimeWarnings);
+      visitLocalsRetainedByInitializer(Path, U->getSubExpr(), Visit, true);
     break;
   }
 
   case Stmt::ArraySectionExprClass: {
-    visitLocalsRetainedByInitializer(Path,
-                                     cast<ArraySectionExpr>(Init)->getBase(),
-                                     Visit, true, EnableLifetimeWarnings);
+    visitLocalsRetainedByInitializer(
+        Path, cast<ArraySectionExpr>(Init)->getBase(), Visit, true);
     break;
   }
 
@@ -598,11 +590,9 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
   case Stmt::BinaryConditionalOperatorClass: {
     auto *C = cast<AbstractConditionalOperator>(Init);
     if (!C->getTrueExpr()->getType()->isVoidType())
-      visitLocalsRetainedByReferenceBinding(Path, C->getTrueExpr(), RK, Visit,
-                                            EnableLifetimeWarnings);
+      visitLocalsRetainedByReferenceBinding(Path, C->getTrueExpr(), RK, Visit);
     if (!C->getFalseExpr()->getType()->isVoidType())
-      visitLocalsRetainedByReferenceBinding(Path, C->getFalseExpr(), RK, Visit,
-                                            EnableLifetimeWarnings);
+      visitLocalsRetainedByReferenceBinding(Path, C->getFalseExpr(), RK, Visit);
     break;
   }
 
@@ -625,8 +615,7 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
 /// the prvalue expression \c Init.
 static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
                                              Expr *Init, LocalVisitor Visit,
-                                             bool RevisitSubinits,
-                                             bool EnableLifetimeWarnings) {
+                                             bool RevisitSubinits) {
   RevertToOldSizeRAII RAII(Path);
 
   Expr *Old;
@@ -667,18 +656,16 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
                 if (VD && VD->getType().isConstQualified() && VD->getInit() &&
                     !isVarOnPath(Path, VD)) {
                   Path.push_back({IndirectLocalPathEntry::VarInit, DRE, VD});
-                  visitLocalsRetainedByInitializer(
-                      Path, VD->getInit(), Visit, true, EnableLifetimeWarnings);
+                  visitLocalsRetainedByInitializer(Path, VD->getInit(), Visit,
+                                                   true);
                 }
               } else if (auto *MTE = dyn_cast<MaterializeTemporaryExpr>(L)) {
                 if (MTE->getType().isConstQualified())
                   visitLocalsRetainedByInitializer(Path, MTE->getSubExpr(),
-                                                   Visit, true,
-                                                   EnableLifetimeWarnings);
+                                                   Visit, true);
               }
               return false;
-            },
-            EnableLifetimeWarnings);
+            });
 
         // We assume that objects can be retained by pointers cast to integers,
         // but not if the integer is cast to floating-point type or to _Complex.
@@ -707,9 +694,8 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
         // Model array-to-pointer decay as taking the address of the array
         // lvalue.
         Path.push_back({IndirectLocalPathEntry::AddressOf, CE});
-        return visitLocalsRetainedByReferenceBinding(Path, CE->getSubExpr(),
-                                                     RK_ReferenceBinding, Visit,
-                                                     EnableLifetimeWarnings);
+        return visitLocalsRetainedByReferenceBinding(
+            Path, CE->getSubExpr(), RK_ReferenceBinding, Visit);
 
       default:
         return;
@@ -724,8 +710,7 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
   //   lifetime of the array exactly like binding a reference to a temporary.
   if (auto *ILE = dyn_cast<CXXStdInitializerListExpr>(Init))
     return visitLocalsRetainedByReferenceBinding(Path, ILE->getSubExpr(),
-                                                 RK_StdInitializerList, Visit,
-                                                 EnableLifetimeWarnings);
+                                                 RK_StdInitializerList, Visit);
 
   if (InitListExpr *ILE = dyn_cast<InitListExpr>(Init)) {
     // We already visited the elements of this initializer list while
@@ -736,14 +721,12 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
 
     if (ILE->isTransparent())
       return visitLocalsRetainedByInitializer(Path, ILE->getInit(0), Visit,
-                                              RevisitSubinits,
-                                              EnableLifetimeWarnings);
+                                              RevisitSubinits);
 
     if (ILE->getType()->isArrayType()) {
       for (unsigned I = 0, N = ILE->getNumInits(); I != N; ++I)
         visitLocalsRetainedByInitializer(Path, ILE->getInit(I), Visit,
-                                         RevisitSubinits,
-                                         EnableLifetimeWarnings);
+                                         RevisitSubinits);
       return;
     }
 
@@ -756,14 +739,12 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
       if (RD->isUnion() && ILE->getInitializedFieldInUnion() &&
           ILE->getInitializedFieldInUnion()->getType()->isReferenceType())
         visitLocalsRetainedByReferenceBinding(Path, ILE->getInit(0),
-                                              RK_ReferenceBinding, Visit,
-                                              EnableLifetimeWarnings);
+                                              RK_ReferenceBinding, Visit);
       else {
         unsigned Index = 0;
         for (; Index < RD->getNumBases() && Index < ILE->getNumInits(); ++Index)
           visitLocalsRetainedByInitializer(Path, ILE->getInit(Index), Visit,
-                                           RevisitSubinits,
-                                           EnableLifetimeWarnings);
+                                           RevisitSubinits);
         for (const auto *I : RD->fields()) {
           if (Index >= ILE->getNumInits())
             break;
@@ -772,14 +753,13 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
           Expr *SubInit = ILE->getInit(Index);
           if (I->getType()->isReferenceType())
             visitLocalsRetainedByReferenceBinding(Path, SubInit,
-                                                  RK_ReferenceBinding, Visit,
-                                                  EnableLifetimeWarnings);
+                                                  RK_ReferenceBinding, Visit);
           else
             // This might be either aggregate-initialization of a member or
             // initialization of a std::initializer_list object. Regardless,
             // we should recursively lifetime-extend that initializer.
-            visitLocalsRetainedByInitializer(
-                Path, SubInit, Visit, RevisitSubinits, EnableLifetimeWarnings);
+            visitLocalsRetainedByInitializer(Path, SubInit, Visit,
+                                             RevisitSubinits);
           ++Index;
         }
       }
@@ -800,10 +780,9 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
         Path.push_back({IndirectLocalPathEntry::LambdaCaptureInit, E, &Cap});
       if (E->isGLValue())
         visitLocalsRetainedByReferenceBinding(Path, E, RK_ReferenceBinding,
-                                              Visit, EnableLifetimeWarnings);
+                                              Visit);
       else
-        visitLocalsRetainedByInitializer(Path, E, Visit, true,
-                                         EnableLifetimeWarnings);
+        visitLocalsRetainedByInitializer(Path, E, Visit, true);
       if (Cap.capturesVariable())
         Path.pop_back();
     }
@@ -817,16 +796,14 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
         Expr *Arg = MTE->getSubExpr();
         Path.push_back({IndirectLocalPathEntry::TemporaryCopy, Arg,
                         CCE->getConstructor()});
-        visitLocalsRetainedByInitializer(Path, Arg, Visit, true,
-                                         /*EnableLifetimeWarnings*/ false);
+        visitLocalsRetainedByInitializer(Path, Arg, Visit, true);
         Path.pop_back();
       }
     }
   }
 
   if (isa<CallExpr>(Init) || isa<CXXConstructExpr>(Init))
-    return visitFunctionCallArguments(Path, Init, Visit,
-                                      EnableLifetimeWarnings);
+    return visitFunctionCallArguments(Path, Init, Visit);
 
   switch (Init->getStmtClass()) {
   case Stmt::UnaryOperatorClass: {
@@ -842,8 +819,7 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
 
       Path.push_back({IndirectLocalPathEntry::AddressOf, UO});
       visitLocalsRetainedByReferenceBinding(Path, UO->getSubExpr(),
-                                            RK_ReferenceBinding, Visit,
-                                            EnableLifetimeWarnings);
+                                            RK_ReferenceBinding, Visit);
     }
     break;
   }
@@ -856,11 +832,9 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
       break;
 
     if (BO->getLHS()->getType()->isPointerType())
-      visitLocalsRetainedByInitializer(Path, BO->getLHS(), Visit, true,
-                                       EnableLifetimeWarnings);
+      visitLocalsRetainedByInitializer(Path, BO->getLHS(), Visit, true);
     else if (BO->getRHS()->getType()->isPointerType())
-      visitLocalsRetainedByInitializer(Path, BO->getRHS(), Visit, true,
-                                       EnableLifetimeWarnings);
+      visitLocalsRetainedByInitializer(Path, BO->getRHS(), Visit, true);
     break;
   }
 
@@ -870,11 +844,9 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
     // In C++, we can have a throw-expression operand, which has 'void' type
     // and isn't interesting from a lifetime perspective.
     if (!C->getTrueExpr()->getType()->isVoidType())
-      visitLocalsRetainedByInitializer(Path, C->getTrueExpr(), Visit, true,
-                                       EnableLifetimeWarnings);
+      visitLocalsRetainedByInitializer(Path, C->getTrueExpr(), Visit, true);
     if (!C->getFalseExpr()->getType()->isVoidType())
-      visitLocalsRetainedByInitializer(Path, C->getFalseExpr(), Visit, true,
-                                       EnableLifetimeWarnings);
+      visitLocalsRetainedByInitializer(Path, C->getFalseExpr(), Visit, true);
     break;
   }
 
@@ -899,11 +871,6 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
 enum PathLifetimeKind {
   /// Lifetime-extend along this path.
   Extend,
-  /// We should lifetime-extend, but we don't because (due to technical
-  /// limitations) we can't. This happens for default member initializers,
-  /// which we don't clone for every use, so we don't have a unique
-  /// MaterializeTemporaryExpr to update.
-  ShouldExtend,
   /// Do not lifetime extend along this path.
   NoExtend
 };
@@ -915,7 +882,7 @@ shouldLifetimeExtendThroughPath(const IndirectLocalPath &Path) {
   PathLifetimeKind Kind = PathLifetimeKind::Extend;
   for (auto Elem : Path) {
     if (Elem.Kind == IndirectLocalPathEntry::DefaultInit)
-      Kind = PathLifetimeKind::ShouldExtend;
+      return PathLifetimeKind::Extend;
     else if (Elem.Kind != IndirectLocalPathEntry::LambdaCaptureInit)
       return PathLifetimeKind::NoExtend;
   }
@@ -972,10 +939,10 @@ static bool pathOnlyHandlesGslPointer(IndirectLocalPath &Path) {
   return false;
 }
 
-static bool isAssginmentOperatorLifetimeBound(CXXMethodDecl *CMD) {
+static bool isAssignmentOperatorLifetimeBound(CXXMethodDecl *CMD) {
   if (!CMD)
     return false;
-  return isNormalAsisgnmentOperator(CMD) && CMD->param_size() == 1 &&
+  return isNormalAssignmentOperator(CMD) && CMD->param_size() == 1 &&
          CMD->getParamDecl(0)->hasAttr<LifetimeBoundAttr>();
 }
 
@@ -985,15 +952,14 @@ static bool shouldRunGSLAssignmentAnalysis(const Sema &SemaRef,
       diag::warn_dangling_lifetime_pointer_assignment, SourceLocation());
   return (EnableGSLAssignmentWarnings &&
           (isRecordWithAttr<PointerAttr>(Entity.LHS->getType()) ||
-           isAssginmentOperatorLifetimeBound(Entity.AssignmentOperator)));
+           isAssignmentOperatorLifetimeBound(Entity.AssignmentOperator)));
 }
 
 static void checkExprLifetimeImpl(Sema &SemaRef,
                                   const InitializedEntity *InitEntity,
                                   const InitializedEntity *ExtendingEntity,
                                   LifetimeKind LK,
-                                  const AssignedEntity *AEntity, Expr *Init,
-                                  bool EnableLifetimeWarnings) {
+                                  const AssignedEntity *AEntity, Expr *Init) {
   assert((AEntity && LK == LK_Assignment) ||
          (InitEntity && LK != LK_Assignment));
   // If this entity doesn't have an interesting lifetime, don't bother looking
@@ -1063,17 +1029,6 @@ static void checkExprLifetimeImpl(Sema &SemaRef,
         // Also visit the temporaries lifetime-extended by this initializer.
         return true;
 
-      case PathLifetimeKind::ShouldExtend:
-        // We're supposed to lifetime-extend the temporary along this path (per
-        // the resolution of DR1815), but we don't support that yet.
-        //
-        // FIXME: Properly handle this situation. Perhaps the easiest approach
-        // would be to clone the initializer expression on each use that would
-        // lifetime extend its temporaries.
-        SemaRef.Diag(DiagLoc, diag::warn_unsupported_lifetime_extension)
-            << RK << DiagRange;
-        break;
-
       case PathLifetimeKind::NoExtend:
         // If the path goes through the initialization of a variable or field,
         // it can't possibly reach a temporary created in this full-expression.
@@ -1292,13 +1247,12 @@ static void checkExprLifetimeImpl(Sema &SemaRef,
 
   if (Init->isGLValue())
     visitLocalsRetainedByReferenceBinding(Path, Init, RK_ReferenceBinding,
-                                          TemporaryVisitor,
-                                          EnableLifetimeWarnings);
+                                          TemporaryVisitor);
   else
     visitLocalsRetainedByInitializer(
         Path, Init, TemporaryVisitor,
         // Don't revisit the sub inits for the intialization case.
-        /*RevisitSubinits=*/!InitEntity, EnableLifetimeWarnings);
+        /*RevisitSubinits=*/!InitEntity);
 }
 
 void checkExprLifetime(Sema &SemaRef, const InitializedEntity &Entity,
@@ -1306,16 +1260,12 @@ void checkExprLifetime(Sema &SemaRef, const InitializedEntity &Entity,
   auto LTResult = getEntityLifetime(&Entity);
   LifetimeKind LK = LTResult.getInt();
   const InitializedEntity *ExtendingEntity = LTResult.getPointer();
-  bool EnableLifetimeWarnings = !SemaRef.getDiagnostics().isIgnored(
-      diag::warn_dangling_lifetime_pointer, SourceLocation());
   checkExprLifetimeImpl(SemaRef, &Entity, ExtendingEntity, LK,
-                        /*AEntity*/ nullptr, Init, EnableLifetimeWarnings);
+                        /*AEntity*/ nullptr, Init);
 }
 
 void checkExprLifetime(Sema &SemaRef, const AssignedEntity &Entity,
                        Expr *Init) {
-  bool EnableLifetimeWarnings = !SemaRef.getDiagnostics().isIgnored(
-      diag::warn_dangling_lifetime_pointer, SourceLocation());
   bool EnableDanglingPointerAssignment = !SemaRef.getDiagnostics().isIgnored(
       diag::warn_dangling_pointer_assignment, SourceLocation());
   bool RunAnalysis = (EnableDanglingPointerAssignment &&
@@ -1327,7 +1277,7 @@ void checkExprLifetime(Sema &SemaRef, const AssignedEntity &Entity,
 
   checkExprLifetimeImpl(SemaRef, /*InitEntity=*/nullptr,
                         /*ExtendingEntity=*/nullptr, LK_Assignment, &Entity,
-                        Init, EnableLifetimeWarnings);
+                        Init);
 }
 
 } // namespace clang::sema
diff --git clang/lib/Sema/HLSLExternalSemaSource.cpp clang/lib/Sema/HLSLExternalSemaSource.cpp
index 9aacbe4ad954..da7bbf8baa74 100644
--- clang/lib/Sema/HLSLExternalSemaSource.cpp
+++ clang/lib/Sema/HLSLExternalSemaSource.cpp
@@ -13,10 +13,13 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/Type.h"
 #include "clang/Basic/AttrKinds.h"
 #include "clang/Basic/HLSLRuntime.h"
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/Sema.h"
+#include "clang/Sema/SemaHLSL.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Frontend/HLSL/HLSLResource.h"
 
 #include <functional>
@@ -107,7 +110,7 @@ struct BuiltinTypeDeclBuilder {
   }
 
   BuiltinTypeDeclBuilder &
-  addHandleMember(ResourceClass RC, ResourceKind RK, bool IsROV,
+  addHandleMember(Sema &S, ResourceClass RC, ResourceKind RK, bool IsROV,
                   AccessSpecifier Access = AccessSpecifier::AS_private) {
     if (Record->isCompleteDefinition())
       return *this;
@@ -118,16 +121,16 @@ struct BuiltinTypeDeclBuilder {
         Ty = Record->getASTContext().getPointerType(
             QualType(TTD->getTypeForDecl(), 0));
     }
-    // add handle member
-    Attr *ResourceClassAttr =
-        HLSLResourceClassAttr::CreateImplicit(Record->getASTContext(), RC);
+
+    // add handle member with resource type attributes
+    QualType AttributedResTy = QualType();
+    SmallVector<const Attr *> Attrs = {
+        HLSLResourceClassAttr::CreateImplicit(Record->getASTContext(), RC),
+        IsROV ? HLSLROVAttr::CreateImplicit(Record->getASTContext()) : nullptr};
     Attr *ResourceAttr =
         HLSLResourceAttr::CreateImplicit(Record->getASTContext(), RK);
-    Attr *ROVAttr =
-        IsROV ? HLSLROVAttr::CreateImplicit(Record->getASTContext()) : nullptr;
-    addMemberVariable("h", Ty, {ResourceClassAttr, ResourceAttr, ROVAttr},
-                      Access);
-
+    if (CreateHLSLAttributedResourceType(S, Ty, Attrs, AttributedResTy))
+      addMemberVariable("h", AttributedResTy, {ResourceAttr}, Access);
     return *this;
   }
 
@@ -494,7 +497,7 @@ static BuiltinTypeDeclBuilder setupBufferType(CXXRecordDecl *Decl, Sema &S,
                                               ResourceClass RC, ResourceKind RK,
                                               bool IsROV) {
   return BuiltinTypeDeclBuilder(Decl)
-      .addHandleMember(RC, RK, IsROV)
+      .addHandleMember(S, RC, RK, IsROV)
       .addDefaultHandleConstructor(S, RC);
 }
 
@@ -522,6 +525,16 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
         .addArraySubscriptOperators()
         .completeDefinition();
   });
+
+  Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "StructuredBuffer")
+             .addSimpleTemplateParams(*SemaPtr, {"element_type"})
+             .Record;
+  onCompletion(Decl, [this](CXXRecordDecl *Decl) {
+    setupBufferType(Decl, *SemaPtr, ResourceClass::UAV,
+                    ResourceKind::TypedBuffer, /*IsROV=*/false)
+        .addArraySubscriptOperators()
+        .completeDefinition();
+  });
 }
 
 void HLSLExternalSemaSource::onCompletion(CXXRecordDecl *Record,
diff --git clang/lib/Sema/Sema.cpp clang/lib/Sema/Sema.cpp
index 29acd06af603..46ddd360870b 100644
--- clang/lib/Sema/Sema.cpp
+++ clang/lib/Sema/Sema.cpp
@@ -2865,6 +2865,7 @@ bool FunctionEffectDiff::shouldDiagnoseConversion(
       // matching is better.
       return true;
     }
+    llvm_unreachable("Unhandled FunctionEffectDiff::Kind enum");
   case FunctionEffect::Kind::Blocking:
   case FunctionEffect::Kind::Allocating:
     return false;
@@ -2890,6 +2891,7 @@ bool FunctionEffectDiff::shouldDiagnoseRedeclaration(
       // All these forms of mismatches are diagnosed.
       return true;
     }
+    llvm_unreachable("Unhandled FunctionEffectDiff::Kind enum");
   case FunctionEffect::Kind::Blocking:
   case FunctionEffect::Kind::Allocating:
     return false;
@@ -2921,6 +2923,7 @@ FunctionEffectDiff::shouldDiagnoseMethodOverride(
     case Kind::ConditionMismatch:
       return OverrideResult::Warn;
     }
+    llvm_unreachable("Unhandled FunctionEffectDiff::Kind enum");
 
   case FunctionEffect::Kind::Blocking:
   case FunctionEffect::Kind::Allocating:
diff --git clang/lib/Sema/SemaARM.cpp clang/lib/Sema/SemaARM.cpp
index 185e0427d5c9..efde354860de 100644
--- clang/lib/Sema/SemaARM.cpp
+++ clang/lib/Sema/SemaARM.cpp
@@ -315,40 +315,6 @@ bool SemaARM::BuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
   return false;
 }
 
-// Get the valid immediate range for the specified NEON type code.
-static unsigned RFT(unsigned t, bool shift = false, bool ForceQuad = false) {
-  NeonTypeFlags Type(t);
-  int IsQuad = ForceQuad ? true : Type.isQuad();
-  switch (Type.getEltType()) {
-  case NeonTypeFlags::Int8:
-  case NeonTypeFlags::Poly8:
-    return shift ? 7 : (8 << IsQuad) - 1;
-  case NeonTypeFlags::Int16:
-  case NeonTypeFlags::Poly16:
-    return shift ? 15 : (4 << IsQuad) - 1;
-  case NeonTypeFlags::Int32:
-    return shift ? 31 : (2 << IsQuad) - 1;
-  case NeonTypeFlags::Int64:
-  case NeonTypeFlags::Poly64:
-    return shift ? 63 : (1 << IsQuad) - 1;
-  case NeonTypeFlags::Poly128:
-    return shift ? 127 : (1 << IsQuad) - 1;
-  case NeonTypeFlags::Float16:
-    assert(!shift && "cannot shift float types!");
-    return (4 << IsQuad) - 1;
-  case NeonTypeFlags::Float32:
-    assert(!shift && "cannot shift float types!");
-    return (2 << IsQuad) - 1;
-  case NeonTypeFlags::Float64:
-    assert(!shift && "cannot shift float types!");
-    return (1 << IsQuad) - 1;
-  case NeonTypeFlags::BFloat16:
-    assert(!shift && "cannot shift float types!");
-    return (4 << IsQuad) - 1;
-  }
-  llvm_unreachable("Invalid NeonTypeFlag!");
-}
-
 /// getNeonEltType - Return the QualType corresponding to the elements of
 /// the vector type specified by the NeonTypeFlags.  This is used to check
 /// the pointer arguments for Neon load/store intrinsics.
@@ -404,142 +370,171 @@ enum ArmSMEState : unsigned {
   ArmZT0Mask = 0b11 << 2
 };
 
-bool SemaARM::ParseSVEImmChecks(
-    CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
-  // Perform all the immediate checks for this builtin call.
+bool SemaARM::CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy,
+                                unsigned ArgIdx, unsigned EltBitWidth,
+                                unsigned VecBitWidth) {
+  // Function that checks whether the operand (ArgIdx) is an immediate
+  // that is one of a given set of values.
+  auto CheckImmediateInSet = [&](std::initializer_list<int64_t> Set,
+                                 int ErrDiag) -> bool {
+    // We can't check the value of a dependent argument.
+    Expr *Arg = TheCall->getArg(ArgIdx);
+    if (Arg->isTypeDependent() || Arg->isValueDependent())
+      return false;
+
+    // Check constant-ness first.
+    llvm::APSInt Imm;
+    if (SemaRef.BuiltinConstantArg(TheCall, ArgIdx, Imm))
+      return true;
+
+    if (std::find(Set.begin(), Set.end(), Imm.getSExtValue()) == Set.end())
+      return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange();
+    return false;
+  };
+
+  switch ((ImmCheckType)CheckTy) {
+  case ImmCheckType::ImmCheck0_31:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 31))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_13:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 13))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_63:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_16:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_7:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_1:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_3:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 3))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_7:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 7))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckExtract:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                        (2048 / EltBitWidth) - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckCvt:
+  case ImmCheckType::ImmCheckShiftRight:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, EltBitWidth))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckShiftRightNarrow:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, EltBitWidth / 2))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckShiftLeft:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, EltBitWidth - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckLaneIndex:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                        (VecBitWidth / EltBitWidth) - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckLaneIndexCompRotate:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                        (VecBitWidth / (2 * EltBitWidth)) - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckLaneIndexDot:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                        (VecBitWidth / (4 * EltBitWidth)) - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckComplexRot90_270:
+    if (CheckImmediateInSet({90, 270}, diag::err_rotation_argument_to_cadd))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckComplexRotAll90:
+    if (CheckImmediateInSet({0, 90, 180, 270},
+                            diag::err_rotation_argument_to_cmla))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_1:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_2:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 2))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_3:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_0:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 0))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_15:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 15))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_255:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 255))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_32:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_64:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck2_4_Mul2:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 2, 4) ||
+        SemaRef.BuiltinConstantArgMultiple(TheCall, ArgIdx, 2))
+      return true;
+    break;
+  }
+  return false;
+}
+
+bool SemaARM::PerformNeonImmChecks(
+    CallExpr *TheCall,
+    SmallVectorImpl<std::tuple<int, int, int, int>> &ImmChecks,
+    int OverloadType) {
   bool HasError = false;
-  for (auto &I : ImmChecks) {
-    int ArgNum, CheckTy, ElementSizeInBits;
-    std::tie(ArgNum, CheckTy, ElementSizeInBits) = I;
-
-    typedef bool (*OptionSetCheckFnTy)(int64_t Value);
-
-    // Function that checks whether the operand (ArgNum) is an immediate
-    // that is one of the predefined values.
-    auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm,
-                                   int ErrDiag) -> bool {
-      // We can't check the value of a dependent argument.
-      Expr *Arg = TheCall->getArg(ArgNum);
-      if (Arg->isTypeDependent() || Arg->isValueDependent())
-        return false;
-
-      // Check constant-ness first.
-      llvm::APSInt Imm;
-      if (SemaRef.BuiltinConstantArg(TheCall, ArgNum, Imm))
-        return true;
 
-      if (!CheckImm(Imm.getSExtValue()))
-        return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange();
-      return false;
-    };
+  for (const auto &I : ImmChecks) {
+    auto [ArgIdx, CheckTy, ElementSizeInBits, VecSizeInBits] = I;
 
-    switch ((SVETypeFlags::ImmCheckType)CheckTy) {
-    case SVETypeFlags::ImmCheck0_31:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheck0_13:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 13))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheck1_16:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 16))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheck0_7:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheck1_1:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheck1_3:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheck1_7:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheckExtract:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          (2048 / ElementSizeInBits) - 1))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheckShiftRight:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1,
-                                          ElementSizeInBits))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheckShiftRightNarrow:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1,
-                                          ElementSizeInBits / 2))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheckShiftLeft:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          ElementSizeInBits - 1))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheckLaneIndex:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          (128 / (1 * ElementSizeInBits)) - 1))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheckLaneIndexCompRotate:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          (128 / (2 * ElementSizeInBits)) - 1))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheckLaneIndexDot:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          (128 / (4 * ElementSizeInBits)) - 1))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheckComplexRot90_270:
-      if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
-                              diag::err_rotation_argument_to_cadd))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheckComplexRotAll90:
-      if (CheckImmediateInSet(
-              [](int64_t V) {
-                return V == 0 || V == 90 || V == 180 || V == 270;
-              },
-              diag::err_rotation_argument_to_cmla))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheck0_1:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 1))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheck0_2:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 2))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheck0_3:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 3))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheck0_0:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 0))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheck0_15:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 15))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheck0_255:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
-        HasError = true;
-      break;
-    case SVETypeFlags::ImmCheck2_4_Mul2:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
-          SemaRef.BuiltinConstantArgMultiple(TheCall, ArgNum, 2))
-        HasError = true;
-      break;
-    }
+    if (OverloadType >= 0)
+      ElementSizeInBits = NeonTypeFlags(OverloadType).getEltSizeInBits();
+
+    HasError |= CheckImmediateArg(TheCall, CheckTy, ArgIdx, ElementSizeInBits,
+                                  VecSizeInBits);
+  }
+
+  return HasError;
+}
+
+bool SemaARM::PerformSVEImmChecks(
+    CallExpr *TheCall, SmallVectorImpl<std::tuple<int, int, int>> &ImmChecks) {
+  bool HasError = false;
+
+  for (const auto &I : ImmChecks) {
+    auto [ArgIdx, CheckTy, ElementSizeInBits] = I;
+    HasError |=
+        CheckImmediateArg(TheCall, CheckTy, ArgIdx, ElementSizeInBits, 128);
   }
 
   return HasError;
@@ -694,7 +689,7 @@ bool SemaARM::CheckSMEBuiltinFunctionCall(unsigned BuiltinID,
 #undef GET_SME_IMMEDIATE_CHECK
   }
 
-  return ParseSVEImmChecks(TheCall, ImmChecks);
+  return PerformSVEImmChecks(TheCall, ImmChecks);
 }
 
 bool SemaARM::CheckSVEBuiltinFunctionCall(unsigned BuiltinID,
@@ -722,7 +717,7 @@ bool SemaARM::CheckSVEBuiltinFunctionCall(unsigned BuiltinID,
 #undef GET_SVE_IMMEDIATE_CHECK
   }
 
-  return ParseSVEImmChecks(TheCall, ImmChecks);
+  return PerformSVEImmChecks(TheCall, ImmChecks);
 }
 
 bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
@@ -749,7 +744,7 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 
   llvm::APSInt Result;
   uint64_t mask = 0;
-  unsigned TV = 0;
+  int TV = -1;
   int PtrArgNum = -1;
   bool HasConstPtr = false;
   switch (BuiltinID) {
@@ -802,7 +797,7 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 
   // For NEON intrinsics which take an immediate value as part of the
   // instruction, range check them here.
-  unsigned i = 0, l = 0, u = 0;
+  SmallVector<std::tuple<int, int, int, int>, 2> ImmChecks;
   switch (BuiltinID) {
   default:
     return false;
@@ -812,7 +807,7 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 #undef GET_NEON_IMMEDIATE_CHECK
   }
 
-  return SemaRef.BuiltinConstantArgRange(TheCall, i, l, u + l);
+  return PerformNeonImmChecks(TheCall, ImmChecks, TV);
 }
 
 bool SemaARM::CheckMVEBuiltinFunctionCall(unsigned BuiltinID,
diff --git clang/lib/Sema/SemaChecking.cpp clang/lib/Sema/SemaChecking.cpp
index b01765b6833a..99500daca295 100644
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1844,6 +1844,44 @@ static ExprResult BuiltinLaunder(Sema &S, CallExpr *TheCall) {
   return TheCall;
 }
 
+static ExprResult BuiltinIsWithinLifetime(Sema &S, CallExpr *TheCall) {
+  if (S.checkArgCount(TheCall, 1))
+    return ExprError();
+
+  ExprResult Arg = S.DefaultFunctionArrayLvalueConversion(TheCall->getArg(0));
+  if (Arg.isInvalid())
+    return ExprError();
+  QualType ParamTy = Arg.get()->getType();
+  TheCall->setArg(0, Arg.get());
+  TheCall->setType(S.Context.BoolTy);
+
+  // Only accept pointers to objects as arguments, which should have object
+  // pointer or void pointer types.
+  if (const auto *PT = ParamTy->getAs<PointerType>()) {
+    // LWG4138: Function pointer types not allowed
+    if (PT->getPointeeType()->isFunctionType()) {
+      S.Diag(TheCall->getArg(0)->getExprLoc(),
+             diag::err_builtin_is_within_lifetime_invalid_arg)
+          << 1;
+      return ExprError();
+    }
+    // Disallow VLAs too since those shouldn't be able to
+    // be a template parameter for `std::is_within_lifetime`
+    if (PT->getPointeeType()->isVariableArrayType()) {
+      S.Diag(TheCall->getArg(0)->getExprLoc(), diag::err_vla_unsupported)
+          << 1 << "__builtin_is_within_lifetime";
+      return ExprError();
+    }
+  } else {
+    S.Diag(TheCall->getArg(0)->getExprLoc(),
+           diag::err_builtin_is_within_lifetime_invalid_arg)
+        << 0;
+    return ExprError();
+  }
+
+  return TheCall;
+}
+
 // Emit an error and return true if the current object format type is in the
 // list of unsupported types.
 static bool CheckBuiltinTargetNotInUnsupported(
@@ -2276,6 +2314,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   }
   case Builtin::BI__builtin_launder:
     return BuiltinLaunder(*this, TheCall);
+  case Builtin::BI__builtin_is_within_lifetime:
+    return BuiltinIsWithinLifetime(*this, TheCall);
   case Builtin::BI__sync_fetch_and_add:
   case Builtin::BI__sync_fetch_and_add_1:
   case Builtin::BI__sync_fetch_and_add_2:
@@ -4896,10 +4936,19 @@ bool Sema::BuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs,
   // Usual Unary Conversions will convert half to float, which we want for
   // machines that use fp16 conversion intrinsics. Else, we wnat to leave the
   // type how it is, but do normal L->Rvalue conversions.
-  if (Context.getTargetInfo().useFP16ConversionIntrinsics())
-    OrigArg = UsualUnaryConversions(OrigArg).get();
-  else
-    OrigArg = DefaultFunctionArrayLvalueConversion(OrigArg).get();
+  if (Context.getTargetInfo().useFP16ConversionIntrinsics()) {
+    ExprResult Res = UsualUnaryConversions(OrigArg);
+
+    if (!Res.isUsable())
+      return true;
+    OrigArg = Res.get();
+  } else {
+    ExprResult Res = DefaultFunctionArrayLvalueConversion(OrigArg);
+
+    if (!Res.isUsable())
+      return true;
+    OrigArg = Res.get();
+  }
   TheCall->setArg(FPArgNo, OrigArg);
 
   QualType VectorResultTy;
diff --git clang/lib/Sema/SemaCoroutine.cpp clang/lib/Sema/SemaCoroutine.cpp
index 1bb8955f6f87..a574d56646f3 100644
--- clang/lib/Sema/SemaCoroutine.cpp
+++ clang/lib/Sema/SemaCoroutine.cpp
@@ -844,6 +844,19 @@ ExprResult Sema::BuildOperatorCoawaitLookupExpr(Scope *S, SourceLocation Loc) {
   return CoawaitOp;
 }
 
+static bool isAttributedCoroAwaitElidable(const QualType &QT) {
+  auto *Record = QT->getAsCXXRecordDecl();
+  return Record && Record->hasAttr<CoroAwaitElidableAttr>();
+}
+
+static bool isCoroAwaitElidableCall(Expr *Operand) {
+  if (!Operand->isPRValue()) {
+    return false;
+  }
+
+  return isAttributedCoroAwaitElidable(Operand->getType());
+}
+
 // Attempts to resolve and build a CoawaitExpr from "raw" inputs, bailing out to
 // DependentCoawaitExpr if needed.
 ExprResult Sema::BuildUnresolvedCoawaitExpr(SourceLocation Loc, Expr *Operand,
@@ -867,7 +880,16 @@ ExprResult Sema::BuildUnresolvedCoawaitExpr(SourceLocation Loc, Expr *Operand,
   }
 
   auto *RD = Promise->getType()->getAsCXXRecordDecl();
-  auto *Transformed = Operand;
+  bool AwaitElidable =
+      isCoroAwaitElidableCall(Operand) &&
+      isAttributedCoroAwaitElidable(
+          getCurFunctionDecl(/*AllowLambda=*/true)->getReturnType());
+
+  if (AwaitElidable)
+    if (auto *Call = dyn_cast<CallExpr>(Operand->IgnoreImplicit()))
+      Call->setCoroElideSafe();
+
+  Expr *Transformed = Operand;
   if (lookupMember(*this, "await_transform", RD, Loc)) {
     ExprResult R =
         buildPromiseCall(*this, Promise, Loc, "await_transform", Operand);
diff --git clang/lib/Sema/SemaDecl.cpp clang/lib/Sema/SemaDecl.cpp
index 0f63c764536e..3c6a0dff798f 100644
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -9766,6 +9766,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     // the function decl is created above).
     // FIXME: We need a better way to separate C++ standard and clang modules.
     bool ImplicitInlineCXX20 = !getLangOpts().CPlusPlusModules ||
+                               NewFD->isConstexpr() || NewFD->isConsteval() ||
                                !NewFD->getOwningModule() ||
                                NewFD->isFromExplicitGlobalModule() ||
                                NewFD->getOwningModule()->isHeaderLikeModule();
diff --git clang/lib/Sema/SemaDeclAttr.cpp clang/lib/Sema/SemaDeclAttr.cpp
index 33547c2e6e14..72d82b424c26 100644
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -2993,10 +2993,17 @@ bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
     return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
            << Unknown << Tune << ParsedAttrs.Tune << Target;
 
-  if (Context.getTargetInfo().getTriple().isRISCV() &&
-      ParsedAttrs.Duplicate != "")
-    return Diag(LiteralLoc, diag::err_duplicate_target_attribute)
-           << Duplicate << None << ParsedAttrs.Duplicate << Target;
+  if (Context.getTargetInfo().getTriple().isRISCV()) {
+    if (ParsedAttrs.Duplicate != "")
+      return Diag(LiteralLoc, diag::err_duplicate_target_attribute)
+             << Duplicate << None << ParsedAttrs.Duplicate << Target;
+    for (const auto &Feature : ParsedAttrs.Features) {
+      StringRef CurFeature = Feature;
+      if (!CurFeature.starts_with('+') && !CurFeature.starts_with('-'))
+        return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
+               << Unsupported << None << AttrStr << Target;
+    }
+  }
 
   if (ParsedAttrs.Duplicate != "")
     return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
@@ -6907,12 +6914,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
   case ParsedAttr::AT_HLSLResourceBinding:
     S.HLSL().handleResourceBindingAttr(D, AL);
     break;
-  case ParsedAttr::AT_HLSLROV:
-    handleSimpleAttribute<HLSLROVAttr>(S, D, AL);
-    break;
-  case ParsedAttr::AT_HLSLResourceClass:
-    S.HLSL().handleResourceClassAttr(D, AL);
-    break;
   case ParsedAttr::AT_HLSLParamModifier:
     S.HLSL().handleParamModifierAttr(D, AL);
     break;
diff --git clang/lib/Sema/SemaDeclCXX.cpp clang/lib/Sema/SemaDeclCXX.cpp
index 3044f1218f5b..6b6fa98bf394 100644
--- clang/lib/Sema/SemaDeclCXX.cpp
+++ clang/lib/Sema/SemaDeclCXX.cpp
@@ -4396,8 +4396,9 @@ Sema::BuildMemInitializer(Decl *ConstructorD,
           for (auto const &Base : ClassDecl->bases()) {
             auto BaseTemplate =
                 Base.getType()->getAs<TemplateSpecializationType>();
-            if (BaseTemplate && Context.hasSameTemplateName(
-                                    BaseTemplate->getTemplateName(), TN)) {
+            if (BaseTemplate &&
+                Context.hasSameTemplateName(BaseTemplate->getTemplateName(), TN,
+                                            /*IgnoreDeduced=*/true)) {
               Diag(IdLoc, diag::ext_unqualified_base_class)
                   << SourceRange(IdLoc, Init->getSourceRange().getEnd());
               BaseType = Base.getType();
@@ -8450,10 +8451,12 @@ private:
     if (Obj.first.isInvalid() || Obj.second.isInvalid())
       return {ExprError(), ExprError()};
     CXXCastPath Path = {Base};
-    return {S.ImpCastExprToType(Obj.first.get(), Base->getType(),
-                                CK_DerivedToBase, VK_LValue, &Path),
-            S.ImpCastExprToType(Obj.second.get(), Base->getType(),
-                                CK_DerivedToBase, VK_LValue, &Path)};
+    const auto CastToBase = [&](Expr *E) {
+      QualType ToType = S.Context.getQualifiedType(
+          Base->getType(), E->getType().getQualifiers());
+      return S.ImpCastExprToType(E, ToType, CK_DerivedToBase, VK_LValue, &Path);
+    };
+    return {CastToBase(Obj.first.get()), CastToBase(Obj.second.get())};
   }
 
   ExprPair getField(FieldDecl *Field) {
@@ -11457,8 +11460,8 @@ bool Sema::CheckDeductionGuideDeclarator(Declarator &D, QualType &R,
     if (auto RetTST =
             TSI->getTypeLoc().getAsAdjusted<TemplateSpecializationTypeLoc>()) {
       TemplateName SpecifiedName = RetTST.getTypePtr()->getTemplateName();
-      bool TemplateMatches =
-          Context.hasSameTemplateName(SpecifiedName, GuidedTemplate);
+      bool TemplateMatches = Context.hasSameTemplateName(
+          SpecifiedName, GuidedTemplate, /*IgnoreDeduced=*/true);
 
       const QualifiedTemplateName *Qualifiers =
           SpecifiedName.getAsQualifiedTemplateName();
diff --git clang/lib/Sema/SemaExpr.cpp clang/lib/Sema/SemaExpr.cpp
index e291ef6c97ee..8f3e15cc9a9b 100644
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -5429,6 +5429,8 @@ struct EnsureImmediateInvocationInDefaultArgs
   EnsureImmediateInvocationInDefaultArgs(Sema &SemaRef)
       : TreeTransform(SemaRef) {}
 
+  bool AlwaysRebuild() { return true; }
+
   // Lambda can only have immediate invocations in the default
   // args of their parameters, which is transformed upon calling the closure.
   // The body is not a subexpression, so we have nothing to do.
@@ -5470,7 +5472,7 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc,
   assert(Param->hasDefaultArg() && "can't build nonexistent default arg");
 
   bool NestedDefaultChecking = isCheckingDefaultArgumentOrInitializer();
-  bool InLifetimeExtendingContext = isInLifetimeExtendingContext();
+  bool NeedRebuild = needsRebuildOfDefaultArgOrInit();
   std::optional<ExpressionEvaluationContextRecord::InitializationContext>
       InitializationContext =
           OutermostDeclarationWithDelayedImmediateInvocations();
@@ -5506,13 +5508,15 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc,
 
     // Rewrite the call argument that was created from the corresponding
     // parameter's default argument.
-    if (V.HasImmediateCalls || InLifetimeExtendingContext) {
+    if (V.HasImmediateCalls ||
+        (NeedRebuild && isa_and_present<ExprWithCleanups>(Param->getInit()))) {
       if (V.HasImmediateCalls)
         ExprEvalContexts.back().DelayedDefaultInitializationContext = {
             CallLoc, Param, CurContext};
       // Pass down lifetime extending flag, and collect temporaries in
       // CreateMaterializeTemporaryExpr when we rewrite the call argument.
-      keepInLifetimeExtendingContext();
+      currentEvaluationContext().InLifetimeExtendingContext =
+          parentEvaluationContext().InLifetimeExtendingContext;
       EnsureImmediateInvocationInDefaultArgs Immediate(*this);
       ExprResult Res;
       runWithSufficientStackSpace(CallLoc, [&] {
@@ -5558,7 +5562,7 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
   Expr *Init = nullptr;
 
   bool NestedDefaultChecking = isCheckingDefaultArgumentOrInitializer();
-
+  bool NeedRebuild = needsRebuildOfDefaultArgOrInit();
   EnterExpressionEvaluationContext EvalContext(
       *this, ExpressionEvaluationContext::PotentiallyEvaluated, Field);
 
@@ -5593,12 +5597,27 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
   ImmediateCallVisitor V(getASTContext());
   if (!NestedDefaultChecking)
     V.TraverseDecl(Field);
-  if (V.HasImmediateCalls) {
+
+  // CWG1815
+  // Support lifetime extension of temporary created by aggregate
+  // initialization using a default member initializer. We should rebuild
+  // the initializer in a lifetime extension context if the initializer
+  // expression is an ExprWithCleanups. Then make sure the normal lifetime
+  // extension code recurses into the default initializer and does lifetime
+  // extension when warranted.
+  bool ContainsAnyTemporaries =
+      isa_and_present<ExprWithCleanups>(Field->getInClassInitializer());
+  if (Field->getInClassInitializer() &&
+      !Field->getInClassInitializer()->containsErrors() &&
+      (V.HasImmediateCalls || (NeedRebuild && ContainsAnyTemporaries))) {
     ExprEvalContexts.back().DelayedDefaultInitializationContext = {Loc, Field,
                                                                    CurContext};
     ExprEvalContexts.back().IsCurrentlyCheckingDefaultArgumentOrInitializer =
         NestedDefaultChecking;
-
+    // Pass down lifetime extending flag, and collect temporaries in
+    // CreateMaterializeTemporaryExpr when we rewrite the call argument.
+    currentEvaluationContext().InLifetimeExtendingContext =
+        parentEvaluationContext().InLifetimeExtendingContext;
     EnsureImmediateInvocationInDefaultArgs Immediate(*this);
     ExprResult Res;
     runWithSufficientStackSpace(Loc, [&] {
@@ -17622,7 +17641,8 @@ HandleImmediateInvocations(Sema &SemaRef,
         (SemaRef.inTemplateInstantiation() && !ImmediateEscalating)) {
       SemaRef.Diag(DR->getBeginLoc(), diag::err_invalid_consteval_take_address)
           << ND << isa<CXXRecordDecl>(ND) << FD->isConsteval();
-      SemaRef.Diag(ND->getLocation(), diag::note_declared_at);
+      if (!FD->getBuiltinID())
+        SemaRef.Diag(ND->getLocation(), diag::note_declared_at);
       if (auto Context =
               SemaRef.InnermostDeclarationWithDelayedImmediateInvocations()) {
         SemaRef.Diag(Context->Loc, diag::note_invalid_consteval_initializer)
@@ -17674,11 +17694,10 @@ void Sema::PopExpressionEvaluationContext() {
 
   // Append the collected materialized temporaries into previous context before
   // exit if the previous also is a lifetime extending context.
-  auto &PrevRecord = parentEvaluationContext();
   if (getLangOpts().CPlusPlus23 && Rec.InLifetimeExtendingContext &&
-      PrevRecord.InLifetimeExtendingContext &&
+      parentEvaluationContext().InLifetimeExtendingContext &&
       !Rec.ForRangeLifetimeExtendTemps.empty()) {
-    PrevRecord.ForRangeLifetimeExtendTemps.append(
+    parentEvaluationContext().ForRangeLifetimeExtendTemps.append(
         Rec.ForRangeLifetimeExtendTemps);
   }
 
diff --git clang/lib/Sema/SemaExprCXX.cpp clang/lib/Sema/SemaExprCXX.cpp
index b7531581d37f..ac3fe6ab8f9b 100644
--- clang/lib/Sema/SemaExprCXX.cpp
+++ clang/lib/Sema/SemaExprCXX.cpp
@@ -1540,9 +1540,6 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
                                 bool ListInitialization) {
   QualType Ty = TInfo->getType();
   SourceLocation TyBeginLoc = TInfo->getTypeLoc().getBeginLoc();
-
-  assert((!ListInitialization || Exprs.size() == 1) &&
-         "List initialization must have exactly one expression.");
   SourceRange FullRange = SourceRange(TyBeginLoc, RParenOrBraceLoc);
 
   InitializedEntity Entity =
@@ -4313,8 +4310,10 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
 // from type to the elements of the to type without resizing the vector.
 static QualType adjustVectorType(ASTContext &Context, QualType FromTy,
                                  QualType ToType, QualType *ElTy = nullptr) {
-  auto *ToVec = ToType->castAs<VectorType>();
-  QualType ElType = ToVec->getElementType();
+  QualType ElType = ToType;
+  if (auto *ToVec = ToType->getAs<VectorType>())
+    ElType = ToVec->getElementType();
+
   if (ElTy)
     *ElTy = ElType;
   if (!FromTy->isVectorType())
@@ -4475,7 +4474,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
   case ICK_Integral_Conversion: {
     QualType ElTy = ToType;
     QualType StepTy = ToType;
-    if (ToType->isVectorType())
+    if (FromType->isVectorType() || ToType->isVectorType())
       StepTy = adjustVectorType(Context, FromType, ToType, &ElTy);
     if (ElTy->isBooleanType()) {
       assert(FromType->castAs<EnumType>()->getDecl()->isFixed() &&
@@ -4495,7 +4494,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
   case ICK_Floating_Promotion:
   case ICK_Floating_Conversion: {
     QualType StepTy = ToType;
-    if (ToType->isVectorType())
+    if (FromType->isVectorType() || ToType->isVectorType())
       StepTy = adjustVectorType(Context, FromType, ToType);
     From = ImpCastExprToType(From, StepTy, CK_FloatingCast, VK_PRValue,
                              /*BasePath=*/nullptr, CCK)
@@ -4527,7 +4526,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
   case ICK_Floating_Integral: {
     QualType ElTy = ToType;
     QualType StepTy = ToType;
-    if (ToType->isVectorType())
+    if (FromType->isVectorType() || ToType->isVectorType())
       StepTy = adjustVectorType(Context, FromType, ToType, &ElTy);
     if (ElTy->isRealFloatingType())
       From = ImpCastExprToType(From, StepTy, CK_IntegralToFloating, VK_PRValue,
@@ -4669,11 +4668,11 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
     }
     QualType ElTy = FromType;
     QualType StepTy = ToType;
-    if (FromType->isVectorType()) {
-      if (getLangOpts().HLSL)
-        StepTy = adjustVectorType(Context, FromType, ToType);
+    if (FromType->isVectorType())
       ElTy = FromType->castAs<VectorType>()->getElementType();
-    }
+    if (getLangOpts().HLSL &&
+        (FromType->isVectorType() || ToType->isVectorType()))
+      StepTy = adjustVectorType(Context, FromType, ToType);
 
     From = ImpCastExprToType(From, StepTy, ScalarTypeToBooleanCastKind(ElTy),
                              VK_PRValue,
@@ -4828,8 +4827,8 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
     // TODO: Support HLSL matrices.
     assert((!From->getType()->isMatrixType() && !ToType->isMatrixType()) &&
            "Dimension conversion for matrix types is not implemented yet.");
-    assert(ToType->isVectorType() &&
-           "Dimension conversion is only supported for vector types.");
+    assert((ToType->isVectorType() || ToType->isBuiltinType()) &&
+           "Dimension conversion output must be vector or scalar type.");
     switch (SCS.Dimension) {
     case ICK_HLSL_Vector_Splat: {
       // Vector splat from any arithmetic type to a vector.
@@ -4841,18 +4840,18 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
     }
     case ICK_HLSL_Vector_Truncation: {
       // Note: HLSL built-in vectors are ExtVectors. Since this truncates a
-      // vector to a smaller vector, this can only operate on arguments where
-      // the source and destination types are ExtVectors.
-      assert(From->getType()->isExtVectorType() && ToType->isExtVectorType() &&
-             "HLSL vector truncation should only apply to ExtVectors");
+      // vector to a smaller vector or to a scalar, this can only operate on
+      // arguments where the source type is an ExtVector and the destination
+      // type is destination type is either an ExtVectorType or a builtin scalar
+      // type.
       auto *FromVec = From->getType()->castAs<VectorType>();
-      auto *ToVec = ToType->castAs<VectorType>();
-      QualType ElType = FromVec->getElementType();
-      QualType TruncTy =
-          Context.getExtVectorType(ElType, ToVec->getNumElements());
+      QualType TruncTy = FromVec->getElementType();
+      if (auto *ToVec = ToType->getAs<VectorType>())
+        TruncTy = Context.getExtVectorType(TruncTy, ToVec->getNumElements());
       From = ImpCastExprToType(From, TruncTy, CK_HLSLVectorTruncation,
                                From->getValueKind())
                  .get();
+
       break;
     }
     case ICK_Identity:
@@ -5111,6 +5110,7 @@ static bool CheckUnaryTypeTraitTypeCompleteness(Sema &S, TypeTrait UTT,
   case UTT_IsDestructible:
   case UTT_IsNothrowDestructible:
   case UTT_IsTriviallyDestructible:
+  case UTT_IsIntangibleType:
     if (ArgTy->isIncompleteArrayType() || ArgTy->isVoidType())
       return true;
 
@@ -5170,7 +5170,8 @@ static bool HasNonDeletedDefaultedEqualityComparison(Sema &S,
 
     // const ClassT& obj;
     OpaqueValueExpr Operand(
-        {}, Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),
+        KeyLoc,
+        Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),
         ExprValueKind::VK_LValue);
     UnresolvedSet<16> Functions;
     // obj == obj;
@@ -5696,6 +5697,16 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
         return true;
     return false;
   }
+  case UTT_IsIntangibleType:
+    assert(Self.getLangOpts().HLSL && "intangible types are HLSL-only feature");
+    if (!T->isVoidType() && !T->isIncompleteArrayType())
+      if (Self.RequireCompleteType(TInfo->getTypeLoc().getBeginLoc(), T,
+                                   diag::err_incomplete_type))
+        return false;
+    if (DiagnoseVLAInCXXTypeTrait(Self, TInfo,
+                                  tok::kw___builtin_hlsl_is_intangible))
+      return false;
+    return Self.HLSL().IsIntangibleType(T);
   }
 }
 
diff --git clang/lib/Sema/SemaHLSL.cpp clang/lib/Sema/SemaHLSL.cpp
index 778d524a0054..4e44813fe515 100644
--- clang/lib/Sema/SemaHLSL.cpp
+++ clang/lib/Sema/SemaHLSL.cpp
@@ -9,9 +9,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Sema/SemaHLSL.h"
+#include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/Builtins.h"
 #include "clang/Basic/DiagnosticSema.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/SourceLocation.h"
@@ -19,7 +24,9 @@
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/Sema.h"
+#include "clang/Sema/Template.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
@@ -27,6 +34,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/TargetParser/Triple.h"
 #include <iterator>
+#include <utility>
 
 using namespace clang;
 
@@ -556,46 +564,129 @@ void SemaHLSL::handleShaderAttr(Decl *D, const ParsedAttr &AL) {
     D->addAttr(NewAttr);
 }
 
-void SemaHLSL::handleResourceClassAttr(Decl *D, const ParsedAttr &AL) {
-  if (!AL.isArgIdent(0)) {
-    Diag(AL.getLoc(), diag::err_attribute_argument_type)
-        << AL << AANT_ArgumentIdentifier;
-    return;
-  }
+bool clang::CreateHLSLAttributedResourceType(Sema &S, QualType Wrapped,
+                                             ArrayRef<const Attr *> AttrList,
+                                             QualType &ResType) {
+  assert(AttrList.size() && "expected list of resource attributes");
 
-  IdentifierLoc *Loc = AL.getArgAsIdent(0);
-  StringRef Identifier = Loc->Ident->getName();
-  SourceLocation ArgLoc = Loc->Loc;
+  QualType Contained = QualType();
+  HLSLAttributedResourceType::Attributes ResAttrs = {};
 
-  // Validate.
-  llvm::dxil::ResourceClass RC;
-  if (!HLSLResourceClassAttr::ConvertStrToResourceClass(Identifier, RC)) {
-    Diag(ArgLoc, diag::warn_attribute_type_not_supported)
-        << "ResourceClass" << Identifier;
-    return;
+  bool HasResourceClass = false;
+  for (const Attr *A : AttrList) {
+    if (!A)
+      continue;
+    switch (A->getKind()) {
+    case attr::HLSLResourceClass: {
+      llvm::dxil::ResourceClass RC =
+          cast<HLSLResourceClassAttr>(A)->getResourceClass();
+      if (HasResourceClass) {
+        S.Diag(A->getLocation(), ResAttrs.ResourceClass == RC
+                                     ? diag::warn_duplicate_attribute_exact
+                                     : diag::warn_duplicate_attribute)
+            << A;
+        return false;
+      }
+      ResAttrs.ResourceClass = RC;
+      HasResourceClass = true;
+      break;
+    }
+    case attr::HLSLROV:
+      if (ResAttrs.IsROV) {
+        S.Diag(A->getLocation(), diag::warn_duplicate_attribute_exact) << A;
+        return false;
+      }
+      ResAttrs.IsROV = true;
+      break;
+    default:
+      llvm_unreachable("unhandled resource attribute type");
+    }
+  }
+
+  if (!HasResourceClass) {
+    S.Diag(AttrList.back()->getRange().getEnd(),
+           diag::err_hlsl_missing_resource_class);
+    return false;
   }
 
-  D->addAttr(HLSLResourceClassAttr::Create(getASTContext(), RC, ArgLoc));
+  ResType = S.getASTContext().getHLSLAttributedResourceType(Wrapped, Contained,
+                                                            ResAttrs);
+  return true;
 }
 
-// Validates HLSL resource type attribute and adds it to the list to be
-// processed into a single HLSLAttributedResourceType later on.
-// Returns false if the attribute is invalid.
+// Validates and creates an HLSL attribute that is applied as type attribute on
+// HLSL resource. The attributes are collected in HLSLResourcesTypeAttrs and at
+// the end of the declaration they are applied to the declaration type by
+// wrapping it in HLSLAttributedResourceType.
 bool SemaHLSL::handleResourceTypeAttr(const ParsedAttr &AL) {
-  // FIXME: placeholder - not yet implemented
+  Attr *A = nullptr;
+
+  // validate number of arguments
+  if (!AL.checkExactlyNumArgs(SemaRef, AL.getMinArgs()))
+    return false;
+
+  switch (AL.getKind()) {
+  case ParsedAttr::AT_HLSLResourceClass: {
+    if (!AL.isArgIdent(0)) {
+      Diag(AL.getLoc(), diag::err_attribute_argument_type)
+          << AL << AANT_ArgumentIdentifier;
+      return false;
+    }
+
+    IdentifierLoc *Loc = AL.getArgAsIdent(0);
+    StringRef Identifier = Loc->Ident->getName();
+    SourceLocation ArgLoc = Loc->Loc;
+
+    // Validate resource class value
+    llvm::dxil::ResourceClass RC;
+    if (!HLSLResourceClassAttr::ConvertStrToResourceClass(Identifier, RC)) {
+      Diag(ArgLoc, diag::warn_attribute_type_not_supported)
+          << "ResourceClass" << Identifier;
+      return false;
+    }
+    A = HLSLResourceClassAttr::Create(getASTContext(), RC, AL.getLoc());
+    break;
+  }
+  case ParsedAttr::AT_HLSLROV:
+    A = HLSLROVAttr::Create(getASTContext(), AL.getLoc());
+    break;
+  default:
+    llvm_unreachable("unhandled HLSL attribute");
+  }
+
+  HLSLResourcesTypeAttrs.emplace_back(A);
   return true;
 }
 
-// Combines all resource type attributes and create HLSLAttributedResourceType.
+// Combines all resource type attributes and creates HLSLAttributedResourceType.
 QualType SemaHLSL::ProcessResourceTypeAttributes(QualType CurrentType) {
-  // FIXME: placeholder - not yet implemented
-  return CurrentType;
+  if (!HLSLResourcesTypeAttrs.size())
+    return CurrentType;
+
+  QualType QT = CurrentType;
+  if (CreateHLSLAttributedResourceType(SemaRef, CurrentType,
+                                       HLSLResourcesTypeAttrs, QT)) {
+    const HLSLAttributedResourceType *RT =
+        dyn_cast<HLSLAttributedResourceType>(QT.getTypePtr());
+    // Use the location of the first attribute as the location of the aggregated
+    // type. The attributes are stored in HLSLResourceTypeAttrs in the same
+    // order as they are parsed.
+    SourceLocation Loc = HLSLResourcesTypeAttrs[0]->getLoc();
+    LocsForHLSLAttributedResources.insert(std::pair(RT, Loc));
+  }
+  HLSLResourcesTypeAttrs.clear();
+  return QT;
 }
 
 // Returns source location for the HLSLAttributedResourceType
 SourceLocation
 SemaHLSL::TakeLocForHLSLAttribute(const HLSLAttributedResourceType *RT) {
-  // FIXME: placeholder - not yet implemented
+  auto I = LocsForHLSLAttributedResources.find(RT);
+  if (I != LocsForHLSLAttributedResources.end()) {
+    SourceLocation Loc = I->second;
+    LocsForHLSLAttributedResources.erase(I);
+    return Loc;
+  }
   return SourceLocation();
 }
 
@@ -653,33 +744,19 @@ static void updateResourceClassFlagsFromDeclResourceClass(
   }
 }
 
-template <typename T>
-static const T *getSpecifiedHLSLAttrFromRecordDecl(RecordDecl *TheRecordDecl) {
-  if (!TheRecordDecl)
-    return nullptr;
-
-  if (TheRecordDecl->hasAttr<T>())
-    return TheRecordDecl->getAttr<T>();
-  for (auto *FD : TheRecordDecl->fields()) {
-    const T *Attr = FD->getAttr<T>();
-    if (Attr)
-      return Attr;
+const HLSLAttributedResourceType *
+findAttributedResourceTypeOnField(VarDecl *VD) {
+  assert(VD != nullptr && "expected VarDecl");
+  if (RecordDecl *RD = getRecordDeclFromVarDecl(VD)) {
+    for (auto *FD : RD->fields()) {
+      if (const HLSLAttributedResourceType *AttrResType =
+              dyn_cast<HLSLAttributedResourceType>(FD->getType().getTypePtr()))
+        return AttrResType;
+    }
   }
   return nullptr;
 }
 
-template <typename T>
-static const T *getSpecifiedHLSLAttrFromVarDecl(VarDecl *VD) {
-  RecordDecl *TheRecordDecl = nullptr;
-  if (VD) {
-    TheRecordDecl = getRecordDeclFromVarDecl(VD);
-    if (!TheRecordDecl)
-      return nullptr;
-  }
-
-  return getSpecifiedHLSLAttrFromRecordDecl<T>(TheRecordDecl);
-}
-
 static void updateResourceClassFlagsFromRecordType(RegisterBindingFlags &Flags,
                                                    const RecordType *RT) {
   llvm::SmallVector<const Type *> TypesToScan;
@@ -699,10 +776,11 @@ static void updateResourceClassFlagsFromRecordType(RegisterBindingFlags &Flags,
 
     const RecordDecl *RD = RT->getDecl();
     for (FieldDecl *FD : RD->fields()) {
-      if (HLSLResourceClassAttr *RCAttr =
-              FD->getAttr<HLSLResourceClassAttr>()) {
+      const Type *FieldTy = FD->getType().getTypePtr();
+      if (const HLSLAttributedResourceType *AttrResType =
+              dyn_cast<HLSLAttributedResourceType>(FieldTy)) {
         updateResourceClassFlagsFromDeclResourceClass(
-            Flags, RCAttr->getResourceClass());
+            Flags, AttrResType->getAttrs().ResourceClass);
         continue;
       }
       TypesToScan.emplace_back(FD->getType().getTypePtr());
@@ -729,11 +807,10 @@ static RegisterBindingFlags HLSLFillRegisterBindingFlags(Sema &S,
   }
   // Samplers, UAVs, and SRVs are VarDecl types
   else if (VarDecl *TheVarDecl = dyn_cast<VarDecl>(TheDecl)) {
-    const HLSLResourceClassAttr *resClassAttr =
-        getSpecifiedHLSLAttrFromVarDecl<HLSLResourceClassAttr>(TheVarDecl);
-    if (resClassAttr) {
+    if (const HLSLAttributedResourceType *AttrResType =
+            findAttributedResourceTypeOnField(TheVarDecl)) {
       Flags.Resource = true;
-      Flags.ResourceClass = resClassAttr->getResourceClass();
+      Flags.ResourceClass = AttrResType->getAttrs().ResourceClass;
     } else {
       const clang::Type *TheBaseType = TheVarDecl->getType().getTypePtr();
       while (TheBaseType->isArrayType())
@@ -834,17 +911,10 @@ static void ValidateMultipleRegisterAnnotations(Sema &S, Decl *TheDecl,
 static void DiagnoseHLSLRegisterAttribute(Sema &S, SourceLocation &ArgLoc,
                                           Decl *TheDecl, RegisterType regType) {
 
-  // Samplers, UAVs, and SRVs are VarDecl types
-  VarDecl *TheVarDecl = dyn_cast<VarDecl>(TheDecl);
-  // Cbuffers and Tbuffers are HLSLBufferDecl types
-  HLSLBufferDecl *CBufferOrTBuffer = dyn_cast<HLSLBufferDecl>(TheDecl);
-
   // exactly one of these two types should be set
-  assert(((TheVarDecl && !CBufferOrTBuffer) ||
-          (!TheVarDecl && CBufferOrTBuffer)) &&
-         "either TheVarDecl or CBufferOrTBuffer should be set");
-  (void)TheVarDecl;
-  (void)CBufferOrTBuffer;
+  assert(((isa<VarDecl>(TheDecl) && !isa<HLSLBufferDecl>(TheDecl)) ||
+          (!isa<VarDecl>(TheDecl) && isa<HLSLBufferDecl>(TheDecl))) &&
+         "expecting VarDecl or HLSLBufferDecl");
 
   RegisterBindingFlags Flags = HLSLFillRegisterBindingFlags(S, TheDecl);
   assert((int)Flags.Other + (int)Flags.Resource + (int)Flags.Basic +
@@ -1448,6 +1518,14 @@ bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall) {
   return CheckArgsTypesAreCorrect(S, TheCall, S->Context.FloatTy,
                                   checkDoubleVector);
 }
+bool CheckFloatingOrSignedIntRepresentation(Sema *S, CallExpr *TheCall) {
+  auto checkAllSignedTypes = [](clang::QualType PassedType) -> bool {
+    return !PassedType->hasSignedIntegerRepresentation() &&
+           !PassedType->hasFloatingRepresentation();
+  };
+  return CheckArgsTypesAreCorrect(S, TheCall, S->Context.IntTy,
+                                  checkAllSignedTypes);
+}
 
 bool CheckUnsignedIntRepresentation(Sema *S, CallExpr *TheCall) {
   auto checkAllUnsignedTypes = [](clang::QualType PassedType) -> bool {
@@ -1466,6 +1544,79 @@ void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall,
   TheCall->setType(ReturnType);
 }
 
+static bool CheckScalarOrVector(Sema *S, CallExpr *TheCall, QualType Scalar,
+                                unsigned ArgIndex) {
+  assert(TheCall->getNumArgs() >= ArgIndex);
+  QualType ArgType = TheCall->getArg(ArgIndex)->getType();
+  auto *VTy = ArgType->getAs<VectorType>();
+  // not the scalar or vector<scalar>
+  if (!(S->Context.hasSameUnqualifiedType(ArgType, Scalar) ||
+        (VTy &&
+         S->Context.hasSameUnqualifiedType(VTy->getElementType(), Scalar)))) {
+    S->Diag(TheCall->getArg(0)->getBeginLoc(),
+            diag::err_typecheck_expect_scalar_or_vector)
+        << ArgType << Scalar;
+    return true;
+  }
+  return false;
+}
+
+static bool CheckBoolSelect(Sema *S, CallExpr *TheCall) {
+  assert(TheCall->getNumArgs() == 3);
+  Expr *Arg1 = TheCall->getArg(1);
+  Expr *Arg2 = TheCall->getArg(2);
+  if (!S->Context.hasSameUnqualifiedType(Arg1->getType(), Arg2->getType())) {
+    S->Diag(TheCall->getBeginLoc(),
+            diag::err_typecheck_call_different_arg_types)
+        << Arg1->getType() << Arg2->getType() << Arg1->getSourceRange()
+        << Arg2->getSourceRange();
+    return true;
+  }
+
+  TheCall->setType(Arg1->getType());
+  return false;
+}
+
+static bool CheckVectorSelect(Sema *S, CallExpr *TheCall) {
+  assert(TheCall->getNumArgs() == 3);
+  Expr *Arg1 = TheCall->getArg(1);
+  Expr *Arg2 = TheCall->getArg(2);
+  if (!Arg1->getType()->isVectorType()) {
+    S->Diag(Arg1->getBeginLoc(), diag::err_builtin_non_vector_type)
+        << "Second" << TheCall->getDirectCallee() << Arg1->getType()
+        << Arg1->getSourceRange();
+    return true;
+  }
+
+  if (!Arg2->getType()->isVectorType()) {
+    S->Diag(Arg2->getBeginLoc(), diag::err_builtin_non_vector_type)
+        << "Third" << TheCall->getDirectCallee() << Arg2->getType()
+        << Arg2->getSourceRange();
+    return true;
+  }
+
+  if (!S->Context.hasSameUnqualifiedType(Arg1->getType(), Arg2->getType())) {
+    S->Diag(TheCall->getBeginLoc(),
+            diag::err_typecheck_call_different_arg_types)
+        << Arg1->getType() << Arg2->getType() << Arg1->getSourceRange()
+        << Arg2->getSourceRange();
+    return true;
+  }
+
+  // caller has checked that Arg0 is a vector.
+  // check all three args have the same length.
+  if (TheCall->getArg(0)->getType()->getAs<VectorType>()->getNumElements() !=
+      Arg1->getType()->getAs<VectorType>()->getNumElements()) {
+    S->Diag(TheCall->getBeginLoc(),
+            diag::err_typecheck_vector_lengths_not_equal)
+        << TheCall->getArg(0)->getType() << Arg1->getType()
+        << TheCall->getArg(0)->getSourceRange() << Arg1->getSourceRange();
+    return true;
+  }
+  TheCall->setType(Arg1->getType());
+  return false;
+}
+
 // Note: returning true in this case results in CheckBuiltinFunctionCall
 // returning an ExprError
 bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
@@ -1498,6 +1649,20 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
+  case Builtin::BI__builtin_hlsl_select: {
+    if (SemaRef.checkArgCount(TheCall, 3))
+      return true;
+    if (CheckScalarOrVector(&SemaRef, TheCall, getASTContext().BoolTy, 0))
+      return true;
+    QualType ArgTy = TheCall->getArg(0)->getType();
+    if (ArgTy->isBooleanType() && CheckBoolSelect(&SemaRef, TheCall))
+      return true;
+    auto *VTy = ArgTy->getAs<VectorType>();
+    if (VTy && VTy->getElementType()->isBooleanType() &&
+        CheckVectorSelect(&SemaRef, TheCall))
+      return true;
+    break;
+  }
   case Builtin::BI__builtin_hlsl_elementwise_saturate:
   case Builtin::BI__builtin_hlsl_elementwise_rcp: {
     if (CheckAllArgsHaveFloatRepresentation(&SemaRef, TheCall))
@@ -1574,6 +1739,14 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     TheCall->setType(ArgTyA);
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_sign: {
+    if (CheckFloatingOrSignedIntRepresentation(&SemaRef, TheCall))
+      return true;
+    if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall))
+      return true;
+    SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().IntTy);
+    break;
+  }
   // Note these are llvm builtins that we want to catch invalid intrinsic
   // generation. Normal handling of these builitns will occur elsewhere.
   case Builtin::BI__builtin_elementwise_bitreverse: {
@@ -1609,6 +1782,31 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   return false;
 }
 
+bool SemaHLSL::IsIntangibleType(clang::QualType QT) {
+  if (QT.isNull())
+    return false;
+
+  const Type *Ty = QT->getUnqualifiedDesugaredType();
+
+  // check if it's a builtin type first (simple check, no need to cache it)
+  if (Ty->isBuiltinType())
+    return Ty->isHLSLIntangibleType();
+
+  // unwrap arrays
+  while (isa<ConstantArrayType>(Ty))
+    Ty = Ty->getArrayElementTypeNoTypeQual();
+
+  const RecordType *RT =
+      dyn_cast<RecordType>(Ty->getUnqualifiedDesugaredType());
+  if (!RT)
+    return false;
+
+  CXXRecordDecl *RD = RT->getAsCXXRecordDecl();
+  assert(RD != nullptr &&
+         "all HLSL struct and classes should be CXXRecordDecl");
+  return RD->isHLSLIntangible();
+}
+
 static void BuildFlattenedTypeList(QualType BaseTy,
                                    llvm::SmallVectorImpl<QualType> &List) {
   llvm::SmallVector<QualType, 16> WorkList;
diff --git clang/lib/Sema/SemaInit.cpp clang/lib/Sema/SemaInit.cpp
index 7dc171875246..d21b8cb8c04e 100644
--- clang/lib/Sema/SemaInit.cpp
+++ clang/lib/Sema/SemaInit.cpp
@@ -750,8 +750,21 @@ void InitListChecker::FillInEmptyInitForField(unsigned Init, FieldDecl *Field,
     if (Field->hasInClassInitializer()) {
       if (VerifyOnly)
         return;
-
-      ExprResult DIE = SemaRef.BuildCXXDefaultInitExpr(Loc, Field);
+      ExprResult DIE;
+      {
+        // Enter a default initializer rebuild context, then we can support
+        // lifetime extension of temporary created by aggregate initialization
+        // using a default member initializer.
+        // CWG1815 (https://wg21.link/CWG1815).
+        EnterExpressionEvaluationContext RebuildDefaultInit(
+            SemaRef, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
+        SemaRef.currentEvaluationContext().RebuildDefaultArgOrDefaultInit =
+            true;
+        SemaRef.currentEvaluationContext().DelayedDefaultInitializationContext =
+            SemaRef.parentEvaluationContext()
+                .DelayedDefaultInitializationContext;
+        DIE = SemaRef.BuildCXXDefaultInitExpr(Loc, Field);
+      }
       if (DIE.isInvalid()) {
         hadError = true;
         return;
@@ -7521,10 +7534,8 @@ Sema::CreateMaterializeTemporaryExpr(QualType T, Expr *Temporary,
   // are done in both CreateMaterializeTemporaryExpr and MaybeBindToTemporary,
   // but there may be a chance to merge them.
   Cleanup.setExprNeedsCleanups(false);
-  if (isInLifetimeExtendingContext()) {
-    auto &Record = ExprEvalContexts.back();
-    Record.ForRangeLifetimeExtendTemps.push_back(MTE);
-  }
+  if (isInLifetimeExtendingContext())
+    currentEvaluationContext().ForRangeLifetimeExtendTemps.push_back(MTE);
   return MTE;
 }
 
diff --git clang/lib/Sema/SemaOpenACC.cpp clang/lib/Sema/SemaOpenACC.cpp
index cf207be33175..e1fc9cea1eb2 100644
--- clang/lib/Sema/SemaOpenACC.cpp
+++ clang/lib/Sema/SemaOpenACC.cpp
@@ -1210,6 +1210,10 @@ ExprResult SemaOpenACC::CheckReductionVar(Expr *VarExpr) {
 
 void SemaOpenACC::ActOnConstruct(OpenACCDirectiveKind K,
                                  SourceLocation DirLoc) {
+  // Start an evaluation context to parse the clause arguments on.
+  SemaRef.PushExpressionEvaluationContext(
+      Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
+
   switch (K) {
   case OpenACCDirectiveKind::Invalid:
     // Nothing to do here, an invalid kind has nothing we can check here.  We
@@ -1626,6 +1630,8 @@ ExprResult SemaOpenACC::ActOnArraySectionExpr(Expr *Base, SourceLocation LBLoc,
 
 bool SemaOpenACC::ActOnStartStmtDirective(OpenACCDirectiveKind K,
                                           SourceLocation StartLoc) {
+  SemaRef.DiscardCleanupsInEvaluationContext();
+  SemaRef.PopExpressionEvaluationContext();
   return diagnoseConstructAppertainment(*this, K, StartLoc, /*IsStmt=*/true);
 }
 
@@ -1649,6 +1655,7 @@ StmtResult SemaOpenACC::ActOnEndStmtDirective(OpenACCDirectiveKind K,
         ParentlessLoopConstructs);
 
     ParentlessLoopConstructs.clear();
+
     return ComputeConstruct;
   }
   case OpenACCDirectiveKind::Loop: {
@@ -1704,6 +1711,11 @@ StmtResult SemaOpenACC::ActOnAssociatedStmt(SourceLocation DirectiveLoc,
 
 bool SemaOpenACC::ActOnStartDeclDirective(OpenACCDirectiveKind K,
                                           SourceLocation StartLoc) {
+  // OpenCC3.3 2.1 (line 889)
+  // A program must not depend on the order of evaluation of expressions in
+  // clause arguments or on any side effects of the evaluations.
+  SemaRef.DiscardCleanupsInEvaluationContext();
+  SemaRef.PopExpressionEvaluationContext();
   return diagnoseConstructAppertainment(*this, K, StartLoc, /*IsStmt=*/false);
 }
 
diff --git clang/lib/Sema/SemaOpenMP.cpp clang/lib/Sema/SemaOpenMP.cpp
index 23c4903ec158..b952ffbd69f5 100644
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -42,6 +42,7 @@
 #include "llvm/ADT/PointerEmbeddedInt.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Frontend/OpenMP/OMPAssume.h"
@@ -3707,6 +3708,17 @@ getMapClauseKindFromModifier(OpenMPDefaultmapClauseModifier M,
 }
 
 namespace {
+struct VariableImplicitInfo {
+  static const unsigned MapKindNum = OMPC_MAP_unknown;
+  static const unsigned DefaultmapKindNum = OMPC_DEFAULTMAP_unknown + 1;
+
+  llvm::SetVector<Expr *> Privates;
+  llvm::SetVector<Expr *> Firstprivates;
+  llvm::SetVector<Expr *> Mappings[DefaultmapKindNum][MapKindNum];
+  llvm::SmallVector<OpenMPMapModifierKind, NumberOfOMPMapClauseModifiers>
+      MapModifiers[DefaultmapKindNum];
+};
+
 class DSAAttrChecker final : public StmtVisitor<DSAAttrChecker, void> {
   DSAStackTy *Stack;
   Sema &SemaRef;
@@ -3714,12 +3726,8 @@ class DSAAttrChecker final : public StmtVisitor<DSAAttrChecker, void> {
   bool ErrorFound = false;
   bool TryCaptureCXXThisMembers = false;
   CapturedStmt *CS = nullptr;
-  const static unsigned DefaultmapKindNum = OMPC_DEFAULTMAP_unknown + 1;
-  llvm::SmallVector<Expr *, 4> ImplicitFirstprivate;
-  llvm::SmallVector<Expr *, 4> ImplicitPrivate;
-  llvm::SmallVector<Expr *, 4> ImplicitMap[DefaultmapKindNum][OMPC_MAP_delete];
-  llvm::SmallVector<OpenMPMapModifierKind, NumberOfOMPMapClauseModifiers>
-      ImplicitMapModifier[DefaultmapKindNum];
+
+  VariableImplicitInfo ImpInfo;
   SemaOpenMP::VarsWithInheritedDSAType VarsWithInheritedDSA;
   llvm::SmallDenseSet<const ValueDecl *, 4> ImplicitDeclarations;
 
@@ -3871,9 +3879,9 @@ public:
         bool IsModifierPresent = Stack->getDefaultmapModifier(ClauseKind) ==
                                  OMPC_DEFAULTMAP_MODIFIER_present;
         if (IsModifierPresent) {
-          if (!llvm::is_contained(ImplicitMapModifier[ClauseKind],
+          if (!llvm::is_contained(ImpInfo.MapModifiers[ClauseKind],
                                   OMPC_MAP_MODIFIER_present)) {
-            ImplicitMapModifier[ClauseKind].push_back(
+            ImpInfo.MapModifiers[ClauseKind].push_back(
                 OMPC_MAP_MODIFIER_present);
           }
         }
@@ -3913,13 +3921,13 @@ public:
           IsFirstprivate =
               IsFirstprivate || (Stack->mustBeFirstprivate(ClauseKind) && !Res);
           if (IsFirstprivate) {
-            ImplicitFirstprivate.emplace_back(E);
+            ImpInfo.Firstprivates.insert(E);
           } else {
             OpenMPDefaultmapClauseModifier M =
                 Stack->getDefaultmapModifier(ClauseKind);
             OpenMPMapClauseKind Kind = getMapClauseKindFromModifier(
                 M, ClauseKind == OMPC_DEFAULTMAP_aggregate || Res);
-            ImplicitMap[ClauseKind][Kind].emplace_back(E);
+            ImpInfo.Mappings[ClauseKind][Kind].insert(E);
           }
           return;
         }
@@ -3956,9 +3964,9 @@ public:
             !DVar.RefExpr)) &&
           !Stack->isLoopControlVariable(VD).first) {
         if (Stack->getDefaultDSA() == DSA_private)
-          ImplicitPrivate.push_back(E);
+          ImpInfo.Privates.insert(E);
         else
-          ImplicitFirstprivate.push_back(E);
+          ImpInfo.Firstprivates.insert(E);
         return;
       }
 
@@ -4015,7 +4023,7 @@ public:
             getVariableCategoryFromDecl(SemaRef.getLangOpts(), FD);
         OpenMPMapClauseKind Kind = getMapClauseKindFromModifier(
             Modifier, /*IsAggregateOrDeclareTarget=*/true);
-        ImplicitMap[ClauseKind][Kind].emplace_back(E);
+        ImpInfo.Mappings[ClauseKind][Kind].insert(E);
         return;
       }
 
@@ -4050,7 +4058,7 @@ public:
         // expression.
         // TODO: try to make it firstprivate.
         if (DVar.CKind != OMPC_unknown)
-          ImplicitFirstprivate.push_back(E);
+          ImpInfo.Firstprivates.insert(E);
       }
       return;
     }
@@ -4172,18 +4180,7 @@ public:
     }
   }
   bool isErrorFound() const { return ErrorFound; }
-  ArrayRef<Expr *> getImplicitFirstprivate() const {
-    return ImplicitFirstprivate;
-  }
-  ArrayRef<Expr *> getImplicitPrivate() const { return ImplicitPrivate; }
-  ArrayRef<Expr *> getImplicitMap(OpenMPDefaultmapClauseKind DK,
-                                  OpenMPMapClauseKind MK) const {
-    return ImplicitMap[DK][MK];
-  }
-  ArrayRef<OpenMPMapModifierKind>
-  getImplicitMapModifier(OpenMPDefaultmapClauseKind Kind) const {
-    return ImplicitMapModifier[Kind];
-  }
+  const VariableImplicitInfo &getImplicitInfo() const { return ImpInfo; }
   const SemaOpenMP::VarsWithInheritedDSAType &getVarsWithInheritedDSA() const {
     return VarsWithInheritedDSA;
   }
@@ -6060,69 +6057,56 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
       return StmtError();
     // Generate list of implicitly defined firstprivate variables.
     VarsWithInheritedDSA = DSAChecker.getVarsWithInheritedDSA();
+    VariableImplicitInfo ImpInfo = DSAChecker.getImplicitInfo();
 
-    SmallVector<Expr *, 4> ImplicitFirstprivates(
-        DSAChecker.getImplicitFirstprivate());
-    SmallVector<Expr *, 4> ImplicitPrivates(DSAChecker.getImplicitPrivate());
-    const unsigned DefaultmapKindNum = OMPC_DEFAULTMAP_unknown + 1;
-    SmallVector<Expr *, 4> ImplicitMaps[DefaultmapKindNum][OMPC_MAP_delete];
-    SmallVector<OpenMPMapModifierKind, NumberOfOMPMapClauseModifiers>
-        ImplicitMapModifiers[DefaultmapKindNum];
     SmallVector<SourceLocation, NumberOfOMPMapClauseModifiers>
-        ImplicitMapModifiersLoc[DefaultmapKindNum];
+        ImplicitMapModifiersLoc[VariableImplicitInfo::DefaultmapKindNum];
     // Get the original location of present modifier from Defaultmap clause.
-    SourceLocation PresentModifierLocs[DefaultmapKindNum];
+    SourceLocation PresentModifierLocs[VariableImplicitInfo::DefaultmapKindNum];
     for (OMPClause *C : Clauses) {
       if (auto *DMC = dyn_cast<OMPDefaultmapClause>(C))
         if (DMC->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_present)
           PresentModifierLocs[DMC->getDefaultmapKind()] =
               DMC->getDefaultmapModifierLoc();
     }
-    for (unsigned VC = 0; VC < DefaultmapKindNum; ++VC) {
-      auto K = static_cast<OpenMPDefaultmapClauseKind>(VC);
-      for (unsigned I = 0; I < OMPC_MAP_delete; ++I) {
-        ArrayRef<Expr *> ImplicitMap =
-            DSAChecker.getImplicitMap(K, static_cast<OpenMPMapClauseKind>(I));
-        ImplicitMaps[VC][I].append(ImplicitMap.begin(), ImplicitMap.end());
-      }
-      ArrayRef<OpenMPMapModifierKind> ImplicitModifier =
-          DSAChecker.getImplicitMapModifier(K);
-      ImplicitMapModifiers[VC].append(ImplicitModifier.begin(),
-                                      ImplicitModifier.end());
-      std::fill_n(std::back_inserter(ImplicitMapModifiersLoc[VC]),
-                  ImplicitModifier.size(), PresentModifierLocs[VC]);
+
+    for (OpenMPDefaultmapClauseKind K :
+         llvm::enum_seq_inclusive<OpenMPDefaultmapClauseKind>(
+             OpenMPDefaultmapClauseKind(), OMPC_DEFAULTMAP_unknown)) {
+      std::fill_n(std::back_inserter(ImplicitMapModifiersLoc[K]),
+                  ImpInfo.MapModifiers[K].size(), PresentModifierLocs[K]);
     }
     // Mark taskgroup task_reduction descriptors as implicitly firstprivate.
     for (OMPClause *C : Clauses) {
       if (auto *IRC = dyn_cast<OMPInReductionClause>(C)) {
         for (Expr *E : IRC->taskgroup_descriptors())
           if (E)
-            ImplicitFirstprivates.emplace_back(E);
+            ImpInfo.Firstprivates.insert(E);
       }
       // OpenMP 5.0, 2.10.1 task Construct
       // [detach clause]... The event-handle will be considered as if it was
       // specified on a firstprivate clause.
       if (auto *DC = dyn_cast<OMPDetachClause>(C))
-        ImplicitFirstprivates.push_back(DC->getEventHandler());
+        ImpInfo.Firstprivates.insert(DC->getEventHandler());
     }
-    if (!ImplicitFirstprivates.empty()) {
+    if (!ImpInfo.Firstprivates.empty()) {
       if (OMPClause *Implicit = ActOnOpenMPFirstprivateClause(
-              ImplicitFirstprivates, SourceLocation(), SourceLocation(),
-              SourceLocation())) {
+              ImpInfo.Firstprivates.getArrayRef(), SourceLocation(),
+              SourceLocation(), SourceLocation())) {
         ClausesWithImplicit.push_back(Implicit);
         ErrorFound = cast<OMPFirstprivateClause>(Implicit)->varlist_size() !=
-                     ImplicitFirstprivates.size();
+                     ImpInfo.Firstprivates.size();
       } else {
         ErrorFound = true;
       }
     }
-    if (!ImplicitPrivates.empty()) {
-      if (OMPClause *Implicit =
-              ActOnOpenMPPrivateClause(ImplicitPrivates, SourceLocation(),
-                                       SourceLocation(), SourceLocation())) {
+    if (!ImpInfo.Privates.empty()) {
+      if (OMPClause *Implicit = ActOnOpenMPPrivateClause(
+              ImpInfo.Privates.getArrayRef(), SourceLocation(),
+              SourceLocation(), SourceLocation())) {
         ClausesWithImplicit.push_back(Implicit);
         ErrorFound = cast<OMPPrivateClause>(Implicit)->varlist_size() !=
-                     ImplicitPrivates.size();
+                     ImpInfo.Privates.size();
       } else {
         ErrorFound = true;
       }
@@ -6152,9 +6136,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
           ClausesWithImplicit.emplace_back(Implicit);
       }
     }
-    for (unsigned I = 0, E = DefaultmapKindNum; I < E; ++I) {
+    for (unsigned I = 0; I < VariableImplicitInfo::DefaultmapKindNum; ++I) {
       int ClauseKindCnt = -1;
-      for (ArrayRef<Expr *> ImplicitMap : ImplicitMaps[I]) {
+      for (unsigned J = 0; J < VariableImplicitInfo::MapKindNum; ++J) {
+        ArrayRef<Expr *> ImplicitMap = ImpInfo.Mappings[I][J].getArrayRef();
         ++ClauseKindCnt;
         if (ImplicitMap.empty())
           continue;
@@ -6162,7 +6147,7 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
         DeclarationNameInfo MapperId;
         auto K = static_cast<OpenMPMapClauseKind>(ClauseKindCnt);
         if (OMPClause *Implicit = ActOnOpenMPMapClause(
-                nullptr, ImplicitMapModifiers[I], ImplicitMapModifiersLoc[I],
+                nullptr, ImpInfo.MapModifiers[I], ImplicitMapModifiersLoc[I],
                 MapperIdScopeSpec, MapperId, K, /*IsMapTypeImplicit=*/true,
                 SourceLocation(), SourceLocation(), ImplicitMap,
                 OMPVarListLocTy())) {
diff --git clang/lib/Sema/SemaOverload.cpp clang/lib/Sema/SemaOverload.cpp
index 861b0a91240b..a155bb2fd3ba 100644
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -2032,26 +2032,42 @@ static bool IsVectorConversion(Sema &S, QualType FromType, QualType ToType,
   if (S.Context.hasSameUnqualifiedType(FromType, ToType))
     return false;
 
+  // HLSL allows implicit truncation of vector types.
+  if (S.getLangOpts().HLSL) {
+    auto *ToExtType = ToType->getAs<ExtVectorType>();
+    auto *FromExtType = FromType->getAs<ExtVectorType>();
+
+    // If both arguments are vectors, handle possible vector truncation and
+    // element conversion.
+    if (ToExtType && FromExtType) {
+      unsigned FromElts = FromExtType->getNumElements();
+      unsigned ToElts = ToExtType->getNumElements();
+      if (FromElts < ToElts)
+        return false;
+      if (FromElts == ToElts)
+        ElConv = ICK_Identity;
+      else
+        ElConv = ICK_HLSL_Vector_Truncation;
+
+      QualType FromElTy = FromExtType->getElementType();
+      QualType ToElTy = ToExtType->getElementType();
+      if (S.Context.hasSameUnqualifiedType(FromElTy, ToElTy))
+        return true;
+      return IsVectorElementConversion(S, FromElTy, ToElTy, ICK, From);
+    }
+    if (FromExtType && !ToExtType) {
+      ElConv = ICK_HLSL_Vector_Truncation;
+      QualType FromElTy = FromExtType->getElementType();
+      if (S.Context.hasSameUnqualifiedType(FromElTy, ToType))
+        return true;
+      return IsVectorElementConversion(S, FromElTy, ToType, ICK, From);
+    }
+    // Fallthrough for the case where ToType is a vector and FromType is not.
+  }
+
   // There are no conversions between extended vector types, only identity.
   if (auto *ToExtType = ToType->getAs<ExtVectorType>()) {
-    if (auto *FromExtType = FromType->getAs<ExtVectorType>()) {
-      // HLSL allows implicit truncation of vector types.
-      if (S.getLangOpts().HLSL) {
-        unsigned FromElts = FromExtType->getNumElements();
-        unsigned ToElts = ToExtType->getNumElements();
-        if (FromElts < ToElts)
-          return false;
-        if (FromElts == ToElts)
-          ElConv = ICK_Identity;
-        else
-          ElConv = ICK_HLSL_Vector_Truncation;
-
-        QualType FromElTy = FromExtType->getElementType();
-        QualType ToElTy = ToExtType->getElementType();
-        if (S.Context.hasSameUnqualifiedType(FromElTy, ToElTy))
-          return true;
-        return IsVectorElementConversion(S, FromElTy, ToElTy, ICK, From);
-      }
+    if (FromType->getAs<ExtVectorType>()) {
       // There are no conversions between extended vector types other than the
       // identity conversion.
       return false;
diff --git clang/lib/Sema/SemaRISCV.cpp clang/lib/Sema/SemaRISCV.cpp
index abf8e4ac2f3e..56d6f12fbc6e 100644
--- clang/lib/Sema/SemaRISCV.cpp
+++ clang/lib/Sema/SemaRISCV.cpp
@@ -733,7 +733,7 @@ bool SemaRISCV::CheckBuiltinFunctionCall(const TargetInfo &TI,
     if (ElemSize == 64 && !TI.hasFeature("zvknhb"))
       return Diag(TheCall->getBeginLoc(),
                   diag::err_riscv_builtin_requires_extension)
-             << /* IsExtension */ true << TheCall->getSourceRange() << "zvknb";
+             << /* IsExtension */ true << TheCall->getSourceRange() << "zvknhb";
 
     return CheckInvalidVLENandLMUL(TI, TheCall, SemaRef, Op1Type,
                                    ElemSize * 4) ||
diff --git clang/lib/Sema/SemaTemplate.cpp clang/lib/Sema/SemaTemplate.cpp
index bf6b53700d90..e5ea02a919f4 100644
--- clang/lib/Sema/SemaTemplate.cpp
+++ clang/lib/Sema/SemaTemplate.cpp
@@ -3302,8 +3302,8 @@ Sema::findFailedBooleanCondition(Expr *Cond) {
 QualType Sema::CheckTemplateIdType(TemplateName Name,
                                    SourceLocation TemplateLoc,
                                    TemplateArgumentListInfo &TemplateArgs) {
-  DependentTemplateName *DTN
-    = Name.getUnderlying().getAsDependentTemplateName();
+  DependentTemplateName *DTN =
+      Name.getUnderlying().getAsDependentTemplateName();
   if (DTN && DTN->isIdentifier())
     // When building a template-id where the template-name is dependent,
     // assume the template is a type template. Either our assumption is
@@ -3314,10 +3314,11 @@ QualType Sema::CheckTemplateIdType(TemplateName Name,
         TemplateArgs.arguments());
 
   if (Name.getAsAssumedTemplateName() &&
-      resolveAssumedTemplateNameAsType(/*Scope*/nullptr, Name, TemplateLoc))
+      resolveAssumedTemplateNameAsType(/*Scope=*/nullptr, Name, TemplateLoc))
     return QualType();
 
-  TemplateDecl *Template = Name.getAsTemplateDecl();
+  auto [Template, DefaultArgs] = Name.getTemplateDeclAndDefaultArgs();
+
   if (!Template || isa<FunctionTemplateDecl>(Template) ||
       isa<VarTemplateDecl>(Template) || isa<ConceptDecl>(Template)) {
     // We might have a substituted template template parameter pack. If so,
@@ -3335,8 +3336,9 @@ QualType Sema::CheckTemplateIdType(TemplateName Name,
   // Check that the template argument list is well-formed for this
   // template.
   SmallVector<TemplateArgument, 4> SugaredConverted, CanonicalConverted;
-  if (CheckTemplateArgumentList(Template, TemplateLoc, TemplateArgs, false,
-                                SugaredConverted, CanonicalConverted,
+  if (CheckTemplateArgumentList(Template, TemplateLoc, TemplateArgs,
+                                DefaultArgs, false, SugaredConverted,
+                                CanonicalConverted,
                                 /*UpdateArgsWithConversions=*/true))
     return QualType();
 
@@ -3565,7 +3567,9 @@ bool Sema::resolveAssumedTemplateNameAsType(Scope *S, TemplateName &Name,
   if (Corrected && Corrected.getFoundDecl()) {
     diagnoseTypo(Corrected, PDiag(diag::err_no_template_suggest)
                                 << ATN->getDeclName());
-    Name = TemplateName(Corrected.getCorrectionDeclAs<TemplateDecl>());
+    Name = Context.getQualifiedTemplateName(
+        /*NNS=*/nullptr, /*TemplateKeyword=*/false,
+        TemplateName(Corrected.getCorrectionDeclAs<TemplateDecl>()));
     return false;
   }
 
@@ -4012,7 +4016,8 @@ DeclResult Sema::ActOnVarTemplateSpecialization(
   // template.
   SmallVector<TemplateArgument, 4> SugaredConverted, CanonicalConverted;
   if (CheckTemplateArgumentList(VarTemplate, TemplateNameLoc, TemplateArgs,
-                                false, SugaredConverted, CanonicalConverted,
+                                /*DefaultArgs=*/{}, false, SugaredConverted,
+                                CanonicalConverted,
                                 /*UpdateArgsWithConversions=*/true))
     return true;
 
@@ -4179,8 +4184,8 @@ Sema::CheckVarTemplateId(VarTemplateDecl *Template, SourceLocation TemplateLoc,
   SmallVector<TemplateArgument, 4> SugaredConverted, CanonicalConverted;
   if (CheckTemplateArgumentList(
           Template, TemplateNameLoc,
-          const_cast<TemplateArgumentListInfo &>(TemplateArgs), false,
-          SugaredConverted, CanonicalConverted,
+          const_cast<TemplateArgumentListInfo &>(TemplateArgs),
+          /*DefaultArgs=*/{}, false, SugaredConverted, CanonicalConverted,
           /*UpdateArgsWithConversions=*/true))
     return true;
 
@@ -4374,6 +4379,7 @@ Sema::CheckConceptTemplateId(const CXXScopeSpec &SS,
   if (CheckTemplateArgumentList(
           NamedConcept, ConceptNameInfo.getLoc(),
           const_cast<TemplateArgumentListInfo &>(*TemplateArgs),
+          /*DefaultArgs=*/{},
           /*PartialTemplateArgs=*/false, SugaredConverted, CanonicalConverted,
           /*UpdateArgsWithConversions=*/false))
     return ExprError();
@@ -5299,7 +5305,8 @@ static bool diagnoseMissingArgument(Sema &S, SourceLocation Loc,
 /// for specializing the given template.
 bool Sema::CheckTemplateArgumentList(
     TemplateDecl *Template, SourceLocation TemplateLoc,
-    TemplateArgumentListInfo &TemplateArgs, bool PartialTemplateArgs,
+    TemplateArgumentListInfo &TemplateArgs, const DefaultArguments &DefaultArgs,
+    bool PartialTemplateArgs,
     SmallVectorImpl<TemplateArgument> &SugaredConverted,
     SmallVectorImpl<TemplateArgument> &CanonicalConverted,
     bool UpdateArgsWithConversions, bool *ConstraintsNotSatisfied,
@@ -5327,9 +5334,29 @@ bool Sema::CheckTemplateArgumentList(
   SmallVector<TemplateArgument, 2> CanonicalArgumentPack;
   unsigned ArgIdx = 0, NumArgs = NewArgs.size();
   LocalInstantiationScope InstScope(*this, true);
-  for (TemplateParameterList::iterator Param = Params->begin(),
-                                       ParamEnd = Params->end();
-       Param != ParamEnd; /* increment in loop */) {
+  for (TemplateParameterList::iterator ParamBegin = Params->begin(),
+                                       ParamEnd = Params->end(),
+                                       Param = ParamBegin;
+       Param != ParamEnd;
+       /* increment in loop */) {
+    if (size_t ParamIdx = Param - ParamBegin;
+        DefaultArgs && ParamIdx >= DefaultArgs.StartPos) {
+      // All written arguments should have been consumed by this point.
+      assert(ArgIdx == NumArgs && "bad default argument deduction");
+      // FIXME: Don't ignore parameter packs.
+      if (ParamIdx == DefaultArgs.StartPos && !(*Param)->isParameterPack()) {
+        assert(Param + DefaultArgs.Args.size() <= ParamEnd);
+        // Default arguments from a DeducedTemplateName are already converted.
+        for (const TemplateArgument &DefArg : DefaultArgs.Args) {
+          SugaredConverted.push_back(DefArg);
+          CanonicalConverted.push_back(
+              Context.getCanonicalTemplateArgument(DefArg));
+          ++Param;
+        }
+        continue;
+      }
+    }
+
     // If we have an expanded parameter pack, make sure we don't have too
     // many arguments.
     if (std::optional<unsigned> Expansions = getExpandedPackSize(*Param)) {
@@ -5543,6 +5570,7 @@ bool Sema::CheckTemplateArgumentList(
                               CTAK_Specified))
       return true;
 
+    SugaredConverted.back().setIsDefaulted(true);
     CanonicalConverted.back().setIsDefaulted(true);
 
     // Core issue 150 (assumed resolution): if this is a template template
@@ -7126,7 +7154,7 @@ bool Sema::CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param,
                                          TemplateArgumentLoc &Arg,
                                          bool IsDeduced) {
   TemplateName Name = Arg.getArgument().getAsTemplateOrTemplatePattern();
-  TemplateDecl *Template = Name.getAsTemplateDecl();
+  auto [Template, DefaultArgs] = Name.getTemplateDeclAndDefaultArgs();
   if (!Template) {
     // Any dependent template name is fine.
     assert(Name.isDependent() && "Non-dependent template isn't a declaration?");
@@ -7177,7 +7205,7 @@ bool Sema::CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param,
       return false;
 
     if (isTemplateTemplateParameterAtLeastAsSpecializedAs(
-            Params, Template, Arg.getLocation(), IsDeduced)) {
+            Params, Template, DefaultArgs, Arg.getLocation(), IsDeduced)) {
       // P2113
       // C++20[temp.func.order]p2
       //   [...] If both deductions succeed, the partial ordering selects the
@@ -8231,7 +8259,9 @@ DeclResult Sema::ActOnClassTemplateSpecialization(
   // template.
   SmallVector<TemplateArgument, 4> SugaredConverted, CanonicalConverted;
   if (CheckTemplateArgumentList(ClassTemplate, TemplateNameLoc, TemplateArgs,
-                                false, SugaredConverted, CanonicalConverted,
+                                /*DefaultArgs=*/{},
+                                /*PartialTemplateArgs=*/false, SugaredConverted,
+                                CanonicalConverted,
                                 /*UpdateArgsWithConversions=*/true))
     return true;
 
@@ -9603,7 +9633,8 @@ DeclResult Sema::ActOnExplicitInstantiation(
   // template.
   SmallVector<TemplateArgument, 4> SugaredConverted, CanonicalConverted;
   if (CheckTemplateArgumentList(ClassTemplate, TemplateNameLoc, TemplateArgs,
-                                false, SugaredConverted, CanonicalConverted,
+                                /*DefaultArgs=*/{}, false, SugaredConverted,
+                                CanonicalConverted,
                                 /*UpdateArgsWithConversions=*/true))
     return true;
 
diff --git clang/lib/Sema/SemaTemplateDeduction.cpp clang/lib/Sema/SemaTemplateDeduction.cpp
index 01f18e5a3251..562c57a41299 100644
--- clang/lib/Sema/SemaTemplateDeduction.cpp
+++ clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -134,11 +134,16 @@ static bool hasSameExtendedValue(llvm::APSInt X, llvm::APSInt Y) {
   return X == Y;
 }
 
+/// The kind of PartialOrdering we're performing template argument deduction
+/// for (C++11 [temp.deduct.partial]).
+enum class PartialOrderingKind { None, NonCall, Call };
+
 static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
     Sema &S, TemplateParameterList *TemplateParams, QualType Param,
     QualType Arg, TemplateDeductionInfo &Info,
     SmallVectorImpl<DeducedTemplateArgument> &Deduced, unsigned TDF,
-    bool PartialOrdering, bool DeducedFromArrayBound, bool *HasDeducedAnyParam);
+    PartialOrderingKind POK, bool DeducedFromArrayBound,
+    bool *HasDeducedAnyParam);
 
 enum class PackFold { ParameterToArgument, ArgumentToParameter };
 static TemplateDeductionResult
@@ -147,8 +152,8 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
                         ArrayRef<TemplateArgument> As,
                         TemplateDeductionInfo &Info,
                         SmallVectorImpl<DeducedTemplateArgument> &Deduced,
-                        bool NumberOfArgumentsMustMatch, PackFold PackFold,
-                        bool *HasDeducedAnyParam);
+                        bool NumberOfArgumentsMustMatch, bool PartialOrdering,
+                        PackFold PackFold, bool *HasDeducedAnyParam);
 
 static void MarkUsedTemplateParameters(ASTContext &Ctx,
                                        const TemplateArgument &TemplateArg,
@@ -401,6 +406,7 @@ DeduceNonTypeTemplateArgument(Sema &S, TemplateParameterList *TemplateParams,
                               const NonTypeTemplateParmDecl *NTTP,
                               const DeducedTemplateArgument &NewDeduced,
                               QualType ValueType, TemplateDeductionInfo &Info,
+                              bool PartialOrdering,
                               SmallVectorImpl<DeducedTemplateArgument> &Deduced,
                               bool *HasDeducedAnyParam) {
   assert(NTTP->getDepth() == Info.getDeducedDepth() &&
@@ -445,7 +451,9 @@ DeduceNonTypeTemplateArgument(Sema &S, TemplateParameterList *TemplateParams,
 
   return DeduceTemplateArgumentsByTypeMatch(
       S, TemplateParams, ParamType, ValueType, Info, Deduced,
-      TDF_SkipNonDependent, /*PartialOrdering=*/false,
+      TDF_SkipNonDependent,
+      PartialOrdering ? PartialOrderingKind::NonCall
+                      : PartialOrderingKind::None,
       /*ArrayBound=*/NewDeduced.wasDeducedFromArrayBound(), HasDeducedAnyParam);
 }
 
@@ -455,13 +463,13 @@ static TemplateDeductionResult DeduceNonTypeTemplateArgument(
     Sema &S, TemplateParameterList *TemplateParams,
     const NonTypeTemplateParmDecl *NTTP, const llvm::APSInt &Value,
     QualType ValueType, bool DeducedFromArrayBound, TemplateDeductionInfo &Info,
-    SmallVectorImpl<DeducedTemplateArgument> &Deduced,
+    bool PartialOrdering, SmallVectorImpl<DeducedTemplateArgument> &Deduced,
     bool *HasDeducedAnyParam) {
   return DeduceNonTypeTemplateArgument(
       S, TemplateParams, NTTP,
       DeducedTemplateArgument(S.Context, Value, ValueType,
                               DeducedFromArrayBound),
-      ValueType, Info, Deduced, HasDeducedAnyParam);
+      ValueType, Info, PartialOrdering, Deduced, HasDeducedAnyParam);
 }
 
 /// Deduce the value of the given non-type template parameter
@@ -470,6 +478,7 @@ static TemplateDeductionResult
 DeduceNullPtrTemplateArgument(Sema &S, TemplateParameterList *TemplateParams,
                               const NonTypeTemplateParmDecl *NTTP,
                               QualType NullPtrType, TemplateDeductionInfo &Info,
+                              bool PartialOrdering,
                               SmallVectorImpl<DeducedTemplateArgument> &Deduced,
                               bool *HasDeducedAnyParam) {
   Expr *Value = S.ImpCastExprToType(
@@ -481,7 +490,7 @@ DeduceNullPtrTemplateArgument(Sema &S, TemplateParameterList *TemplateParams,
                     .get();
   return DeduceNonTypeTemplateArgument(
       S, TemplateParams, NTTP, DeducedTemplateArgument(Value), Value->getType(),
-      Info, Deduced, HasDeducedAnyParam);
+      Info, PartialOrdering, Deduced, HasDeducedAnyParam);
 }
 
 /// Deduce the value of the given non-type template parameter
@@ -491,12 +500,12 @@ DeduceNullPtrTemplateArgument(Sema &S, TemplateParameterList *TemplateParams,
 static TemplateDeductionResult
 DeduceNonTypeTemplateArgument(Sema &S, TemplateParameterList *TemplateParams,
                               const NonTypeTemplateParmDecl *NTTP, Expr *Value,
-                              TemplateDeductionInfo &Info,
+                              TemplateDeductionInfo &Info, bool PartialOrdering,
                               SmallVectorImpl<DeducedTemplateArgument> &Deduced,
                               bool *HasDeducedAnyParam) {
   return DeduceNonTypeTemplateArgument(
       S, TemplateParams, NTTP, DeducedTemplateArgument(Value), Value->getType(),
-      Info, Deduced, HasDeducedAnyParam);
+      Info, PartialOrdering, Deduced, HasDeducedAnyParam);
 }
 
 /// Deduce the value of the given non-type template parameter
@@ -507,76 +516,21 @@ static TemplateDeductionResult
 DeduceNonTypeTemplateArgument(Sema &S, TemplateParameterList *TemplateParams,
                               const NonTypeTemplateParmDecl *NTTP, ValueDecl *D,
                               QualType T, TemplateDeductionInfo &Info,
+                              bool PartialOrdering,
                               SmallVectorImpl<DeducedTemplateArgument> &Deduced,
                               bool *HasDeducedAnyParam) {
   TemplateArgument New(D, T);
-  return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
-                                       DeducedTemplateArgument(New), T, Info,
-                                       Deduced, HasDeducedAnyParam);
-}
-
-/// Create a shallow copy of a given template parameter declaration, with
-/// empty source locations and using the given TemplateArgument as it's
-/// default argument.
-///
-/// \returns The new template parameter declaration.
-static NamedDecl *getTemplateParameterWithDefault(Sema &S, NamedDecl *A,
-                                                  TemplateArgument Default) {
-  switch (A->getKind()) {
-  case Decl::TemplateTypeParm: {
-    auto *T = cast<TemplateTypeParmDecl>(A);
-    auto *R = TemplateTypeParmDecl::Create(
-        S.Context, A->getDeclContext(), SourceLocation(), SourceLocation(),
-        T->getDepth(), T->getIndex(), T->getIdentifier(),
-        T->wasDeclaredWithTypename(), T->isParameterPack(),
-        T->hasTypeConstraint());
-    R->setDefaultArgument(
-        S.Context,
-        S.getTrivialTemplateArgumentLoc(Default, QualType(), SourceLocation()));
-    if (R->hasTypeConstraint()) {
-      auto *C = R->getTypeConstraint();
-      R->setTypeConstraint(C->getConceptReference(),
-                           C->getImmediatelyDeclaredConstraint());
-    }
-    return R;
-  }
-  case Decl::NonTypeTemplateParm: {
-    auto *T = cast<NonTypeTemplateParmDecl>(A);
-    auto *R = NonTypeTemplateParmDecl::Create(
-        S.Context, A->getDeclContext(), SourceLocation(), SourceLocation(),
-        T->getDepth(), T->getIndex(), T->getIdentifier(), T->getType(),
-        T->isParameterPack(), T->getTypeSourceInfo());
-    R->setDefaultArgument(S.Context,
-                          S.getTrivialTemplateArgumentLoc(
-                              Default, Default.getNonTypeTemplateArgumentType(),
-                              SourceLocation()));
-    if (auto *PTC = T->getPlaceholderTypeConstraint())
-      R->setPlaceholderTypeConstraint(PTC);
-    return R;
-  }
-  case Decl::TemplateTemplateParm: {
-    auto *T = cast<TemplateTemplateParmDecl>(A);
-    auto *R = TemplateTemplateParmDecl::Create(
-        S.Context, A->getDeclContext(), SourceLocation(), T->getDepth(),
-        T->getIndex(), T->isParameterPack(), T->getIdentifier(),
-        T->wasDeclaredWithTypename(), T->getTemplateParameters());
-    R->setDefaultArgument(
-        S.Context,
-        S.getTrivialTemplateArgumentLoc(Default, QualType(), SourceLocation()));
-    return R;
-  }
-  default:
-    llvm_unreachable("Unexpected Decl Kind");
-  }
+  return DeduceNonTypeTemplateArgument(
+      S, TemplateParams, NTTP, DeducedTemplateArgument(New), T, Info,
+      PartialOrdering, Deduced, HasDeducedAnyParam);
 }
 
-static TemplateDeductionResult
-DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
-                        TemplateName Param, TemplateName Arg,
-                        TemplateDeductionInfo &Info,
-                        ArrayRef<TemplateArgument> DefaultArguments,
-                        SmallVectorImpl<DeducedTemplateArgument> &Deduced,
-                        bool *HasDeducedAnyParam) {
+static TemplateDeductionResult DeduceTemplateArguments(
+    Sema &S, TemplateParameterList *TemplateParams, TemplateName Param,
+    TemplateName Arg, TemplateDeductionInfo &Info,
+    ArrayRef<TemplateArgument> DefaultArguments, bool PartialOrdering,
+    SmallVectorImpl<DeducedTemplateArgument> &Deduced,
+    bool *HasDeducedAnyParam) {
   TemplateDecl *ParamDecl = Param.getAsTemplateDecl();
   if (!ParamDecl) {
     // The parameter type is dependent and is not a template template parameter,
@@ -589,42 +543,30 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
     if (TempParam->getDepth() != Info.getDeducedDepth())
       return TemplateDeductionResult::Success;
 
-    auto NewDeduced = DeducedTemplateArgument(Arg);
-    // Provisional resolution for CWG2398: If Arg is also a template template
-    // param, and it names a template specialization, then we deduce a
-    // synthesized template template parameter based on A, but using the TS's
-    // arguments as defaults.
-    if (auto *TempArg = dyn_cast_or_null<TemplateTemplateParmDecl>(
-            Arg.getAsTemplateDecl())) {
-      assert(!TempArg->isExpandedParameterPack());
-
-      TemplateParameterList *As = TempArg->getTemplateParameters();
-      if (DefaultArguments.size() != 0) {
-        assert(DefaultArguments.size() <= As->size());
-        SmallVector<NamedDecl *, 4> Params(As->size());
-        for (unsigned I = 0; I < DefaultArguments.size(); ++I)
-          Params[I] = getTemplateParameterWithDefault(S, As->getParam(I),
-                                                      DefaultArguments[I]);
-        for (unsigned I = DefaultArguments.size(); I < As->size(); ++I)
-          Params[I] = As->getParam(I);
-        // FIXME: We could unique these, and also the parameters, but we don't
-        // expect programs to contain a large enough amount of these deductions
-        // for that to be worthwhile.
-        auto *TPL = TemplateParameterList::Create(
-            S.Context, SourceLocation(), SourceLocation(), Params,
-            SourceLocation(), As->getRequiresClause());
-        NewDeduced = DeducedTemplateArgument(
-            TemplateName(TemplateTemplateParmDecl::Create(
-                S.Context, TempArg->getDeclContext(), SourceLocation(),
-                TempArg->getDepth(), TempArg->getPosition(),
-                TempArg->isParameterPack(), TempArg->getIdentifier(),
-                TempArg->wasDeclaredWithTypename(), TPL)));
+    ArrayRef<NamedDecl *> Params =
+        ParamDecl->getTemplateParameters()->asArray();
+    unsigned StartPos = 0;
+    for (unsigned I = 0, E = std::min(Params.size(), DefaultArguments.size());
+         I < E; ++I) {
+      if (Params[I]->isParameterPack()) {
+        StartPos = DefaultArguments.size();
+        break;
       }
+      StartPos = I + 1;
     }
 
-    DeducedTemplateArgument Result = checkDeducedTemplateArguments(S.Context,
-                                                 Deduced[TempParam->getIndex()],
-                                                                   NewDeduced);
+    // Provisional resolution for CWG2398: If Arg names a template
+    // specialization, then we deduce a synthesized template name
+    // based on A, but using the TS's extra arguments, relative to P, as
+    // defaults.
+    DeducedTemplateArgument NewDeduced =
+        PartialOrdering
+            ? TemplateArgument(S.Context.getDeducedTemplateName(
+                  Arg, {StartPos, DefaultArguments.drop_front(StartPos)}))
+            : Arg;
+
+    DeducedTemplateArgument Result = checkDeducedTemplateArguments(
+        S.Context, Deduced[TempParam->getIndex()], NewDeduced);
     if (Result.isNull()) {
       Info.Param = TempParam;
       Info.FirstArg = Deduced[TempParam->getIndex()];
@@ -639,7 +581,8 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
   }
 
   // Verify that the two template names are equivalent.
-  if (S.Context.hasSameTemplateName(Param, Arg))
+  if (S.Context.hasSameTemplateName(
+          Param, Arg, /*IgnoreDeduced=*/DefaultArguments.size() != 0))
     return TemplateDeductionResult::Success;
 
   // Mismatch of non-dependent template parameter to argument.
@@ -681,7 +624,7 @@ static const TemplateSpecializationType *getLastTemplateSpecType(QualType QT) {
 static TemplateDeductionResult
 DeduceTemplateSpecArguments(Sema &S, TemplateParameterList *TemplateParams,
                             const QualType P, QualType A,
-                            TemplateDeductionInfo &Info,
+                            TemplateDeductionInfo &Info, bool PartialOrdering,
                             SmallVectorImpl<DeducedTemplateArgument> &Deduced,
                             bool *HasDeducedAnyParam) {
   QualType UP = P;
@@ -730,9 +673,10 @@ DeduceTemplateSpecArguments(Sema &S, TemplateParameterList *TemplateParams,
             ->template_arguments();
 
     // Perform template argument deduction for the template name.
-    if (auto Result =
-            DeduceTemplateArguments(S, TemplateParams, TNP, TNA, Info,
-                                    AResolved, Deduced, HasDeducedAnyParam);
+    if (auto Result = DeduceTemplateArguments(S, TemplateParams, TNP, TNA, Info,
+                                              /*DefaultArguments=*/AResolved,
+                                              PartialOrdering, Deduced,
+                                              HasDeducedAnyParam);
         Result != TemplateDeductionResult::Success)
       return Result;
 
@@ -741,8 +685,8 @@ DeduceTemplateSpecArguments(Sema &S, TemplateParameterList *TemplateParams,
     // filled in by default arguments.
     return DeduceTemplateArguments(
         S, TemplateParams, PResolved, AResolved, Info, Deduced,
-        /*NumberOfArgumentsMustMatch=*/false, PackFold::ParameterToArgument,
-        HasDeducedAnyParam);
+        /*NumberOfArgumentsMustMatch=*/false, PartialOrdering,
+        PackFold::ParameterToArgument, HasDeducedAnyParam);
   }
 
   // If the argument type is a class template specialization, we
@@ -763,9 +707,10 @@ DeduceTemplateSpecArguments(Sema &S, TemplateParameterList *TemplateParams,
         *NNS, false, TemplateName(SA->getSpecializedTemplate()));
 
   // Perform template argument deduction for the template name.
-  if (auto Result = DeduceTemplateArguments(S, TemplateParams, TNP, TNA, Info,
-                                            SA->getTemplateArgs().asArray(),
-                                            Deduced, HasDeducedAnyParam);
+  if (auto Result = DeduceTemplateArguments(
+          S, TemplateParams, TNP, TNA, Info,
+          /*DefaultArguments=*/SA->getTemplateArgs().asArray(), PartialOrdering,
+          Deduced, HasDeducedAnyParam);
       Result != TemplateDeductionResult::Success)
     return Result;
 
@@ -773,7 +718,7 @@ DeduceTemplateSpecArguments(Sema &S, TemplateParameterList *TemplateParams,
   return DeduceTemplateArguments(S, TemplateParams, PResolved,
                                  SA->getTemplateArgs().asArray(), Info, Deduced,
                                  /*NumberOfArgumentsMustMatch=*/true,
-                                 PackFold::ParameterToArgument,
+                                 PartialOrdering, PackFold::ParameterToArgument,
                                  HasDeducedAnyParam);
 }
 
@@ -1189,7 +1134,7 @@ template <class T>
 static TemplateDeductionResult DeduceForEachType(
     Sema &S, TemplateParameterList *TemplateParams, ArrayRef<QualType> Params,
     ArrayRef<QualType> Args, TemplateDeductionInfo &Info,
-    SmallVectorImpl<DeducedTemplateArgument> &Deduced, bool PartialOrdering,
+    SmallVectorImpl<DeducedTemplateArgument> &Deduced, PartialOrderingKind POK,
     bool FinishingDeduction, T &&DeductFunc) {
   // C++0x [temp.deduct.type]p10:
   //   Similarly, if P has a form that contains (T), then each parameter type
@@ -1219,8 +1164,7 @@ static TemplateDeductionResult DeduceForEachType(
       if (TemplateDeductionResult Result =
               DeductFunc(S, TemplateParams, ParamIdx, ArgIdx,
                          Params[ParamIdx].getUnqualifiedType(),
-                         Args[ArgIdx].getUnqualifiedType(), Info, Deduced,
-                         PartialOrdering);
+                         Args[ArgIdx].getUnqualifiedType(), Info, Deduced, POK);
           Result != TemplateDeductionResult::Success)
         return Result;
 
@@ -1248,7 +1192,7 @@ static TemplateDeductionResult DeduceForEachType(
         if (TemplateDeductionResult Result = DeductFunc(
                 S, TemplateParams, ParamIdx, ArgIdx,
                 Pattern.getUnqualifiedType(), Args[ArgIdx].getUnqualifiedType(),
-                Info, Deduced, PartialOrdering);
+                Info, Deduced, POK);
             Result != TemplateDeductionResult::Success)
           return Result;
         PackScope.nextPackElement();
@@ -1292,7 +1236,7 @@ static TemplateDeductionResult DeduceForEachType(
   // During partial ordering, if Ai was originally a function parameter pack:
   // - if P does not contain a function parameter type corresponding to Ai then
   //   Ai is ignored;
-  if (PartialOrdering && ArgIdx + 1 == Args.size() &&
+  if (POK == PartialOrderingKind::Call && ArgIdx + 1 == Args.size() &&
       isa<PackExpansionType>(Args[ArgIdx]))
     return TemplateDeductionResult::Success;
 
@@ -1339,18 +1283,18 @@ static TemplateDeductionResult DeduceTemplateArguments(
     Sema &S, TemplateParameterList *TemplateParams, ArrayRef<QualType> Params,
     ArrayRef<QualType> Args, TemplateDeductionInfo &Info,
     SmallVectorImpl<DeducedTemplateArgument> &Deduced, unsigned TDF,
-    bool PartialOrdering, bool *HasDeducedAnyParam,
+    PartialOrderingKind POK, bool *HasDeducedAnyParam,
     llvm::SmallBitVector *HasDeducedParam) {
   return ::DeduceForEachType(
-      S, TemplateParams, Params, Args, Info, Deduced, PartialOrdering,
+      S, TemplateParams, Params, Args, Info, Deduced, POK,
       /*FinishingDeduction=*/false,
       [&](Sema &S, TemplateParameterList *TemplateParams, int ParamIdx,
           int ArgIdx, QualType P, QualType A, TemplateDeductionInfo &Info,
           SmallVectorImpl<DeducedTemplateArgument> &Deduced,
-          bool PartialOrdering) {
+          PartialOrderingKind POK) {
         bool HasDeducedAnyParamCopy = false;
         TemplateDeductionResult TDR = DeduceTemplateArgumentsByTypeMatch(
-            S, TemplateParams, P, A, Info, Deduced, TDF, PartialOrdering,
+            S, TemplateParams, P, A, Info, Deduced, TDF, POK,
             /*DeducedFromArrayBound=*/false, &HasDeducedAnyParamCopy);
         if (HasDeducedAnyParam && HasDeducedAnyParamCopy)
           *HasDeducedAnyParam = true;
@@ -1454,7 +1398,7 @@ static bool isForwardingReference(QualType Param, unsigned FirstInnerIndex) {
 static TemplateDeductionResult
 DeduceTemplateBases(Sema &S, const CXXRecordDecl *RD,
                     TemplateParameterList *TemplateParams, QualType P,
-                    TemplateDeductionInfo &Info,
+                    TemplateDeductionInfo &Info, bool PartialOrdering,
                     SmallVectorImpl<DeducedTemplateArgument> &Deduced,
                     bool *HasDeducedAnyParam) {
   // C++14 [temp.deduct.call] p4b3:
@@ -1507,9 +1451,9 @@ DeduceTemplateBases(Sema &S, const CXXRecordDecl *RD,
                                                         Deduced.end());
     TemplateDeductionInfo BaseInfo(TemplateDeductionInfo::ForBase, Info);
     bool HasDeducedAnyParamCopy = false;
-    TemplateDeductionResult BaseResult =
-        DeduceTemplateSpecArguments(S, TemplateParams, P, NextT, BaseInfo,
-                                    DeducedCopy, &HasDeducedAnyParamCopy);
+    TemplateDeductionResult BaseResult = DeduceTemplateSpecArguments(
+        S, TemplateParams, P, NextT, BaseInfo, PartialOrdering, DeducedCopy,
+        &HasDeducedAnyParamCopy);
 
     // If this was a successful deduction, add it to the list of matches,
     // otherwise we need to continue searching its bases.
@@ -1555,6 +1499,14 @@ DeduceTemplateBases(Sema &S, const CXXRecordDecl *RD,
   return TemplateDeductionResult::Success;
 }
 
+/// When propagating a partial ordering kind into a NonCall context,
+/// this is used to downgrade a 'Call' into a 'NonCall', so that
+/// the kind still reflects whether we are in a partial ordering context.
+static PartialOrderingKind
+degradeCallPartialOrderingKind(PartialOrderingKind POK) {
+  return std::min(POK, PartialOrderingKind::NonCall);
+}
+
 /// Deduce the template arguments by comparing the parameter type and
 /// the argument type (C++ [temp.deduct.type]).
 ///
@@ -1583,7 +1535,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
     Sema &S, TemplateParameterList *TemplateParams, QualType P, QualType A,
     TemplateDeductionInfo &Info,
     SmallVectorImpl<DeducedTemplateArgument> &Deduced, unsigned TDF,
-    bool PartialOrdering, bool DeducedFromArrayBound,
+    PartialOrderingKind POK, bool DeducedFromArrayBound,
     bool *HasDeducedAnyParam) {
 
   // If the argument type is a pack expansion, look at its pattern.
@@ -1592,7 +1544,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
     A = AExp->getPattern();
   assert(!isa<PackExpansionType>(A.getCanonicalType()));
 
-  if (PartialOrdering) {
+  if (POK == PartialOrderingKind::Call) {
     // C++11 [temp.deduct.partial]p5:
     //   Before the partial ordering is done, certain transformations are
     //   performed on the types used for partial ordering:
@@ -1866,7 +1818,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         return TemplateDeductionResult::NonDeducedMismatch;
       return DeduceTemplateArgumentsByTypeMatch(
           S, TemplateParams, CP->getElementType(), CA->getElementType(), Info,
-          Deduced, TDF, /*PartialOrdering=*/false,
+          Deduced, TDF, degradeCallPartialOrderingKind(POK),
           /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
     }
 
@@ -1877,7 +1829,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         return TemplateDeductionResult::NonDeducedMismatch;
       return DeduceTemplateArgumentsByTypeMatch(
           S, TemplateParams, PA->getValueType(), AA->getValueType(), Info,
-          Deduced, TDF, /*PartialOrdering=*/false,
+          Deduced, TDF, degradeCallPartialOrderingKind(POK),
           /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
     }
 
@@ -1895,8 +1847,8 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
           S, TemplateParams, P->castAs<PointerType>()->getPointeeType(),
           PointeeType, Info, Deduced,
           TDF & (TDF_IgnoreQualifiers | TDF_DerivedClass),
-          /*PartialOrdering=*/false, /*DeducedFromArrayBound=*/false,
-          HasDeducedAnyParam);
+          degradeCallPartialOrderingKind(POK),
+          /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
     }
 
     //     T &
@@ -1908,7 +1860,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
 
       return DeduceTemplateArgumentsByTypeMatch(
           S, TemplateParams, RP->getPointeeType(), RA->getPointeeType(), Info,
-          Deduced, 0, /*PartialOrdering=*/false,
+          Deduced, 0, degradeCallPartialOrderingKind(POK),
           /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
     }
 
@@ -1921,7 +1873,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
 
       return DeduceTemplateArgumentsByTypeMatch(
           S, TemplateParams, RP->getPointeeType(), RA->getPointeeType(), Info,
-          Deduced, 0, /*PartialOrdering=*/false,
+          Deduced, 0, degradeCallPartialOrderingKind(POK),
           /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
     }
 
@@ -1936,7 +1888,8 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
 
       return DeduceTemplateArgumentsByTypeMatch(
           S, TemplateParams, IAP->getElementType(), IAA->getElementType(), Info,
-          Deduced, TDF & TDF_IgnoreQualifiers, /*PartialOrdering=*/false,
+          Deduced, TDF & TDF_IgnoreQualifiers,
+          degradeCallPartialOrderingKind(POK),
           /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
     }
 
@@ -1950,7 +1903,8 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
 
       return DeduceTemplateArgumentsByTypeMatch(
           S, TemplateParams, CAP->getElementType(), CAA->getElementType(), Info,
-          Deduced, TDF & TDF_IgnoreQualifiers, /*PartialOrdering=*/false,
+          Deduced, TDF & TDF_IgnoreQualifiers,
+          degradeCallPartialOrderingKind(POK),
           /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
     }
 
@@ -1966,7 +1920,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
       if (auto Result = DeduceTemplateArgumentsByTypeMatch(
               S, TemplateParams, DAP->getElementType(), AA->getElementType(),
               Info, Deduced, TDF & TDF_IgnoreQualifiers,
-              /*PartialOrdering=*/false,
+              degradeCallPartialOrderingKind(POK),
               /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
           Result != TemplateDeductionResult::Success)
         return Result;
@@ -1985,13 +1939,14 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         llvm::APSInt Size(CAA->getSize());
         return DeduceNonTypeTemplateArgument(
             S, TemplateParams, NTTP, Size, S.Context.getSizeType(),
-            /*ArrayBound=*/true, Info, Deduced, HasDeducedAnyParam);
+            /*ArrayBound=*/true, Info, POK != PartialOrderingKind::None,
+            Deduced, HasDeducedAnyParam);
       }
       if (const auto *DAA = dyn_cast<DependentSizedArrayType>(AA))
         if (DAA->getSizeExpr())
-          return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
-                                               DAA->getSizeExpr(), Info,
-                                               Deduced, HasDeducedAnyParam);
+          return DeduceNonTypeTemplateArgument(
+              S, TemplateParams, NTTP, DAA->getSizeExpr(), Info,
+              POK != PartialOrderingKind::None, Deduced, HasDeducedAnyParam);
 
       // Incomplete type does not match a dependently-sized array type
       return TemplateDeductionResult::NonDeducedMismatch;
@@ -2014,8 +1969,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
       // Check return types.
       if (auto Result = DeduceTemplateArgumentsByTypeMatch(
               S, TemplateParams, FPP->getReturnType(), FPA->getReturnType(),
-              Info, Deduced, 0,
-              /*PartialOrdering=*/false,
+              Info, Deduced, 0, degradeCallPartialOrderingKind(POK),
               /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
           Result != TemplateDeductionResult::Success)
         return Result;
@@ -2023,8 +1977,9 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
       // Check parameter types.
       if (auto Result = DeduceTemplateArguments(
               S, TemplateParams, FPP->param_types(), FPA->param_types(), Info,
-              Deduced, TDF & TDF_TopLevelParameterTypeList, PartialOrdering,
-              HasDeducedAnyParam, /*HasDeducedParam=*/nullptr);
+              Deduced, TDF & TDF_TopLevelParameterTypeList, POK,
+              HasDeducedAnyParam,
+              /*HasDeducedParam=*/nullptr);
           Result != TemplateDeductionResult::Success)
         return Result;
 
@@ -2052,14 +2007,14 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
           // FIXME: Should we?
           return DeduceNonTypeTemplateArgument(
               S, TemplateParams, NTTP, Noexcept, S.Context.BoolTy,
-              /*DeducedFromArrayBound=*/true, Info, Deduced,
-              HasDeducedAnyParam);
+              /*DeducedFromArrayBound=*/true, Info,
+              POK != PartialOrderingKind::None, Deduced, HasDeducedAnyParam);
 
         case CT_Dependent:
           if (Expr *ArgNoexceptExpr = FPA->getNoexceptExpr())
-            return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
-                                                 ArgNoexceptExpr, Info, Deduced,
-                                                 HasDeducedAnyParam);
+            return DeduceNonTypeTemplateArgument(
+                S, TemplateParams, NTTP, ArgNoexceptExpr, Info,
+                POK != PartialOrderingKind::None, Deduced, HasDeducedAnyParam);
           // Can't deduce anything from throw(T...).
           break;
         }
@@ -2086,13 +2041,15 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
       // arguments from the template-id.
       if (!(TDF & TDF_DerivedClass) || !A->isRecordType())
         return DeduceTemplateSpecArguments(S, TemplateParams, P, A, Info,
+                                           POK != PartialOrderingKind::None,
                                            Deduced, HasDeducedAnyParam);
 
       SmallVector<DeducedTemplateArgument, 8> DeducedOrig(Deduced.begin(),
                                                           Deduced.end());
 
-      auto Result = DeduceTemplateSpecArguments(S, TemplateParams, P, A, Info,
-                                                Deduced, HasDeducedAnyParam);
+      auto Result = DeduceTemplateSpecArguments(
+          S, TemplateParams, P, A, Info, POK != PartialOrderingKind::None,
+          Deduced, HasDeducedAnyParam);
       if (Result == TemplateDeductionResult::Success)
         return Result;
 
@@ -2111,6 +2068,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
 
       // Check bases according to C++14 [temp.deduct.call] p4b3:
       auto BaseResult = DeduceTemplateBases(S, RD, TemplateParams, P, Info,
+                                            POK != PartialOrderingKind::None,
                                             Deduced, HasDeducedAnyParam);
       return BaseResult != TemplateDeductionResult::Invalid ? BaseResult
                                                             : Result;
@@ -2143,15 +2101,15 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
       unsigned SubTDF = TDF & TDF_IgnoreQualifiers;
       if (auto Result = DeduceTemplateArgumentsByTypeMatch(
               S, TemplateParams, PPT, APT, Info, Deduced, SubTDF,
-              /*PartialOrdering=*/false, /*DeducedFromArrayBound=*/false,
-              HasDeducedAnyParam);
+              degradeCallPartialOrderingKind(POK),
+              /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
           Result != TemplateDeductionResult::Success)
         return Result;
       return DeduceTemplateArgumentsByTypeMatch(
           S, TemplateParams, QualType(MPP->getClass(), 0),
           QualType(MPA->getClass(), 0), Info, Deduced, SubTDF,
-          /*PartialOrdering=*/false, /*DeducedFromArrayBound=*/false,
-          HasDeducedAnyParam);
+          degradeCallPartialOrderingKind(POK),
+          /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
     }
 
     //     (clang extension)
@@ -2166,7 +2124,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         return TemplateDeductionResult::NonDeducedMismatch;
       return DeduceTemplateArgumentsByTypeMatch(
           S, TemplateParams, BPP->getPointeeType(), BPA->getPointeeType(), Info,
-          Deduced, 0, /*PartialOrdering=*/false,
+          Deduced, 0, degradeCallPartialOrderingKind(POK),
           /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
     }
 
@@ -2192,8 +2150,8 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
       // Perform deduction on the element types.
       return DeduceTemplateArgumentsByTypeMatch(
           S, TemplateParams, VP->getElementType(), ElementType, Info, Deduced,
-          TDF, /*PartialOrdering=*/false, /*DeducedFromArrayBound=*/false,
-          HasDeducedAnyParam);
+          TDF, degradeCallPartialOrderingKind(POK),
+          /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
     }
 
     case Type::DependentVector: {
@@ -2203,7 +2161,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         // Perform deduction on the element types.
         if (auto Result = DeduceTemplateArgumentsByTypeMatch(
                 S, TemplateParams, VP->getElementType(), VA->getElementType(),
-                Info, Deduced, TDF, /*PartialOrdering=*/false,
+                Info, Deduced, TDF, degradeCallPartialOrderingKind(POK),
                 /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
             Result != TemplateDeductionResult::Success)
           return Result;
@@ -2219,16 +2177,17 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         // Note that we use the "array bound" rules here; just like in that
         // case, we don't have any particular type for the vector size, but
         // we can provide one if necessary.
-        return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP, ArgSize,
-                                             S.Context.UnsignedIntTy, true,
-                                             Info, Deduced, HasDeducedAnyParam);
+        return DeduceNonTypeTemplateArgument(
+            S, TemplateParams, NTTP, ArgSize, S.Context.UnsignedIntTy, true,
+            Info, POK != PartialOrderingKind::None, Deduced,
+            HasDeducedAnyParam);
       }
 
       if (const auto *VA = A->getAs<DependentVectorType>()) {
         // Perform deduction on the element types.
         if (auto Result = DeduceTemplateArgumentsByTypeMatch(
                 S, TemplateParams, VP->getElementType(), VA->getElementType(),
-                Info, Deduced, TDF, /*PartialOrdering=*/false,
+                Info, Deduced, TDF, degradeCallPartialOrderingKind(POK),
                 /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
             Result != TemplateDeductionResult::Success)
           return Result;
@@ -2239,9 +2198,9 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         if (!NTTP)
           return TemplateDeductionResult::Success;
 
-        return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
-                                             VA->getSizeExpr(), Info, Deduced,
-                                             HasDeducedAnyParam);
+        return DeduceNonTypeTemplateArgument(
+            S, TemplateParams, NTTP, VA->getSizeExpr(), Info,
+            POK != PartialOrderingKind::None, Deduced, HasDeducedAnyParam);
       }
 
       return TemplateDeductionResult::NonDeducedMismatch;
@@ -2257,7 +2216,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         // Perform deduction on the element types.
         if (auto Result = DeduceTemplateArgumentsByTypeMatch(
                 S, TemplateParams, VP->getElementType(), VA->getElementType(),
-                Info, Deduced, TDF, /*PartialOrdering=*/false,
+                Info, Deduced, TDF, degradeCallPartialOrderingKind(POK),
                 /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
             Result != TemplateDeductionResult::Success)
           return Result;
@@ -2273,16 +2232,16 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         // Note that we use the "array bound" rules here; just like in that
         // case, we don't have any particular type for the vector size, but
         // we can provide one if necessary.
-        return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP, ArgSize,
-                                             S.Context.IntTy, true, Info,
-                                             Deduced, HasDeducedAnyParam);
+        return DeduceNonTypeTemplateArgument(
+            S, TemplateParams, NTTP, ArgSize, S.Context.IntTy, true, Info,
+            POK != PartialOrderingKind::None, Deduced, HasDeducedAnyParam);
       }
 
       if (const auto *VA = A->getAs<DependentSizedExtVectorType>()) {
         // Perform deduction on the element types.
         if (auto Result = DeduceTemplateArgumentsByTypeMatch(
                 S, TemplateParams, VP->getElementType(), VA->getElementType(),
-                Info, Deduced, TDF, /*PartialOrdering=*/false,
+                Info, Deduced, TDF, degradeCallPartialOrderingKind(POK),
                 /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
             Result != TemplateDeductionResult::Success)
           return Result;
@@ -2293,9 +2252,9 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         if (!NTTP)
           return TemplateDeductionResult::Success;
 
-        return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
-                                             VA->getSizeExpr(), Info, Deduced,
-                                             HasDeducedAnyParam);
+        return DeduceNonTypeTemplateArgument(
+            S, TemplateParams, NTTP, VA->getSizeExpr(), Info,
+            POK != PartialOrderingKind::None, Deduced, HasDeducedAnyParam);
       }
 
       return TemplateDeductionResult::NonDeducedMismatch;
@@ -2319,7 +2278,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
       // Perform deduction on element types.
       return DeduceTemplateArgumentsByTypeMatch(
           S, TemplateParams, MP->getElementType(), MA->getElementType(), Info,
-          Deduced, TDF, /*PartialOrdering=*/false,
+          Deduced, TDF, degradeCallPartialOrderingKind(POK),
           /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
     }
 
@@ -2332,14 +2291,14 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
       // Check the element type of the matrixes.
       if (auto Result = DeduceTemplateArgumentsByTypeMatch(
               S, TemplateParams, MP->getElementType(), MA->getElementType(),
-              Info, Deduced, TDF, /*PartialOrdering=*/false,
+              Info, Deduced, TDF, degradeCallPartialOrderingKind(POK),
               /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
           Result != TemplateDeductionResult::Success)
         return Result;
 
       // Try to deduce a matrix dimension.
       auto DeduceMatrixArg =
-          [&S, &Info, &Deduced, &TemplateParams, &HasDeducedAnyParam](
+          [&S, &Info, &Deduced, &TemplateParams, &HasDeducedAnyParam, POK](
               Expr *ParamExpr, const MatrixType *A,
               unsigned (ConstantMatrixType::*GetArgDimension)() const,
               Expr *(DependentSizedMatrixType::*GetArgDimensionExpr)() const) {
@@ -2376,12 +2335,13 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
               ArgConst = (ACM->*GetArgDimension)();
               return DeduceNonTypeTemplateArgument(
                   S, TemplateParams, NTTP, ArgConst, S.Context.getSizeType(),
-                  /*ArrayBound=*/true, Info, Deduced, HasDeducedAnyParam);
+                  /*ArrayBound=*/true, Info, POK != PartialOrderingKind::None,
+                  Deduced, HasDeducedAnyParam);
             }
 
             return DeduceNonTypeTemplateArgument(
                 S, TemplateParams, NTTP, (ADM->*GetArgDimensionExpr)(), Info,
-                Deduced, HasDeducedAnyParam);
+                POK != PartialOrderingKind::None, Deduced, HasDeducedAnyParam);
           };
 
       if (auto Result = DeduceMatrixArg(MP->getRowExpr(), MA,
@@ -2405,7 +2365,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         // Perform deduction on the pointer type.
         if (auto Result = DeduceTemplateArgumentsByTypeMatch(
                 S, TemplateParams, ASP->getPointeeType(), ASA->getPointeeType(),
-                Info, Deduced, TDF, /*PartialOrdering=*/false,
+                Info, Deduced, TDF, degradeCallPartialOrderingKind(POK),
                 /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
             Result != TemplateDeductionResult::Success)
           return Result;
@@ -2416,9 +2376,9 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         if (!NTTP)
           return TemplateDeductionResult::Success;
 
-        return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
-                                             ASA->getAddrSpaceExpr(), Info,
-                                             Deduced, HasDeducedAnyParam);
+        return DeduceNonTypeTemplateArgument(
+            S, TemplateParams, NTTP, ASA->getAddrSpaceExpr(), Info,
+            POK != PartialOrderingKind::None, Deduced, HasDeducedAnyParam);
       }
 
       if (isTargetAddressSpace(A.getAddressSpace())) {
@@ -2430,8 +2390,8 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         if (auto Result = DeduceTemplateArgumentsByTypeMatch(
                 S, TemplateParams, ASP->getPointeeType(),
                 S.Context.removeAddrSpaceQualType(A), Info, Deduced, TDF,
-                /*PartialOrdering=*/false, /*DeducedFromArrayBound=*/false,
-                HasDeducedAnyParam);
+                degradeCallPartialOrderingKind(POK),
+                /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
             Result != TemplateDeductionResult::Success)
           return Result;
 
@@ -2443,7 +2403,8 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
 
         return DeduceNonTypeTemplateArgument(
             S, TemplateParams, NTTP, ArgAddressSpace, S.Context.IntTy, true,
-            Info, Deduced, HasDeducedAnyParam);
+            Info, POK != PartialOrderingKind::None, Deduced,
+            HasDeducedAnyParam);
       }
 
       return TemplateDeductionResult::NonDeducedMismatch;
@@ -2463,9 +2424,9 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
         llvm::APSInt ArgSize(S.Context.getTypeSize(S.Context.IntTy), false);
         ArgSize = IA->getNumBits();
 
-        return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP, ArgSize,
-                                             S.Context.IntTy, true, Info,
-                                             Deduced, HasDeducedAnyParam);
+        return DeduceNonTypeTemplateArgument(
+            S, TemplateParams, NTTP, ArgSize, S.Context.IntTy, true, Info,
+            POK != PartialOrderingKind::None, Deduced, HasDeducedAnyParam);
       }
 
       if (const auto *IA = A->getAs<DependentBitIntType>()) {
@@ -2496,8 +2457,8 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
       if (PIT->hasSelectedType()) {
         return DeduceTemplateArgumentsByTypeMatch(
             S, TemplateParams, PIT->getSelectedType(), A, Info, Deduced, TDF,
-            /*PartialOrdering=*/false, /*DeducedFromArrayBound=*/false,
-            HasDeducedAnyParam);
+            degradeCallPartialOrderingKind(POK),
+            /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
       }
       return TemplateDeductionResult::IncompletePack;
     }
@@ -2509,7 +2470,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
 static TemplateDeductionResult
 DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
                         const TemplateArgument &P, TemplateArgument A,
-                        TemplateDeductionInfo &Info,
+                        TemplateDeductionInfo &Info, bool PartialOrdering,
                         SmallVectorImpl<DeducedTemplateArgument> &Deduced,
                         bool *HasDeducedAnyParam) {
   // If the template argument is a pack expansion, perform template argument
@@ -2526,17 +2487,21 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
     if (A.getKind() == TemplateArgument::Type)
       return DeduceTemplateArgumentsByTypeMatch(
           S, TemplateParams, P.getAsType(), A.getAsType(), Info, Deduced, 0,
-          /*PartialOrdering=*/false, /*DeducedFromArrayBound=*/false,
-          HasDeducedAnyParam);
+          PartialOrdering ? PartialOrderingKind::NonCall
+                          : PartialOrderingKind::None,
+          /*DeducedFromArrayBound=*/false, HasDeducedAnyParam);
     Info.FirstArg = P;
     Info.SecondArg = A;
     return TemplateDeductionResult::NonDeducedMismatch;
 
   case TemplateArgument::Template:
+    // PartialOrdering does not matter here, since template specializations are
+    // not being deduced.
     if (A.getKind() == TemplateArgument::Template)
       return DeduceTemplateArguments(
           S, TemplateParams, P.getAsTemplate(), A.getAsTemplate(), Info,
-          /*DefaultArguments=*/{}, Deduced, HasDeducedAnyParam);
+          /*DefaultArguments=*/{}, /*PartialOrdering=*/false, Deduced,
+          HasDeducedAnyParam);
     Info.FirstArg = P;
     Info.SecondArg = A;
     return TemplateDeductionResult::NonDeducedMismatch;
@@ -2587,20 +2552,20 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
       case TemplateArgument::Integral:
       case TemplateArgument::Expression:
       case TemplateArgument::StructuralValue:
-        return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
-                                             DeducedTemplateArgument(A),
-                                             A.getNonTypeTemplateArgumentType(),
-                                             Info, Deduced, HasDeducedAnyParam);
+        return DeduceNonTypeTemplateArgument(
+            S, TemplateParams, NTTP, DeducedTemplateArgument(A),
+            A.getNonTypeTemplateArgumentType(), Info, PartialOrdering, Deduced,
+            HasDeducedAnyParam);
 
       case TemplateArgument::NullPtr:
-        return DeduceNullPtrTemplateArgument(S, TemplateParams, NTTP,
-                                             A.getNullPtrType(), Info, Deduced,
-                                             HasDeducedAnyParam);
+        return DeduceNullPtrTemplateArgument(
+            S, TemplateParams, NTTP, A.getNullPtrType(), Info, PartialOrdering,
+            Deduced, HasDeducedAnyParam);
 
       case TemplateArgument::Declaration:
         return DeduceNonTypeTemplateArgument(
             S, TemplateParams, NTTP, A.getAsDecl(), A.getParamTypeForDecl(),
-            Info, Deduced, HasDeducedAnyParam);
+            Info, PartialOrdering, Deduced, HasDeducedAnyParam);
 
       case TemplateArgument::Null:
       case TemplateArgument::Type:
@@ -2672,8 +2637,8 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
                         ArrayRef<TemplateArgument> As,
                         TemplateDeductionInfo &Info,
                         SmallVectorImpl<DeducedTemplateArgument> &Deduced,
-                        bool NumberOfArgumentsMustMatch, PackFold PackFold,
-                        bool *HasDeducedAnyParam) {
+                        bool NumberOfArgumentsMustMatch, bool PartialOrdering,
+                        PackFold PackFold, bool *HasDeducedAnyParam) {
   if (PackFold == PackFold::ArgumentToParameter)
     std::swap(Ps, As);
   // C++0x [temp.deduct.type]p9:
@@ -2710,7 +2675,8 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
       if (PackFold == PackFold::ArgumentToParameter)
         std::swap(Pi, Ai);
       if (auto Result = DeduceTemplateArguments(S, TemplateParams, Pi, Ai, Info,
-                                                Deduced, HasDeducedAnyParam);
+                                                PartialOrdering, Deduced,
+                                                HasDeducedAnyParam);
           Result != TemplateDeductionResult::Success)
         return Result;
 
@@ -2742,7 +2708,8 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
         std::swap(Pi, Ai);
       // Deduce template arguments from the pattern.
       if (auto Result = DeduceTemplateArguments(S, TemplateParams, Pi, Ai, Info,
-                                                Deduced, HasDeducedAnyParam);
+                                                PartialOrdering, Deduced,
+                                                HasDeducedAnyParam);
           Result != TemplateDeductionResult::Success)
         return Result;
 
@@ -2766,7 +2733,8 @@ TemplateDeductionResult Sema::DeduceTemplateArguments(
     bool NumberOfArgumentsMustMatch) {
   return ::DeduceTemplateArguments(
       *this, TemplateParams, Ps, As, Info, Deduced, NumberOfArgumentsMustMatch,
-      PackFold::ParameterToArgument, /*HasDeducedAnyParam=*/nullptr);
+      /*PartialOrdering=*/false, PackFold::ParameterToArgument,
+      /*HasDeducedAnyParam=*/nullptr);
 }
 
 /// Determine whether two template arguments are the same.
@@ -3291,7 +3259,7 @@ FinishTemplateArgumentDeduction(
   SmallVector<TemplateArgument, 4> SugaredConvertedInstArgs,
       CanonicalConvertedInstArgs;
   if (S.CheckTemplateArgumentList(
-          Template, Partial->getLocation(), InstArgs, false,
+          Template, Partial->getLocation(), InstArgs, /*DefaultArgs=*/{}, false,
           SugaredConvertedInstArgs, CanonicalConvertedInstArgs,
           /*UpdateArgsWithConversions=*/true, &ConstraintsNotSatisfied))
     return ConstraintsNotSatisfied
@@ -3443,7 +3411,8 @@ DeduceTemplateArguments(Sema &S, T *Partial,
   if (TemplateDeductionResult Result = ::DeduceTemplateArguments(
           S, Partial->getTemplateParameters(),
           Partial->getTemplateArgs().asArray(), TemplateArgs, Info, Deduced,
-          /*NumberOfArgumentsMustMatch=*/false, PackFold::ParameterToArgument,
+          /*NumberOfArgumentsMustMatch=*/false, /*PartialOrdering=*/true,
+          PackFold::ParameterToArgument,
           /*HasDeducedAnyParam=*/nullptr);
       Result != TemplateDeductionResult::Success)
     return Result;
@@ -3593,8 +3562,8 @@ TemplateDeductionResult Sema::SubstituteExplicitTemplateArguments(
     return TemplateDeductionResult::InstantiationDepth;
 
   if (CheckTemplateArgumentList(FunctionTemplate, SourceLocation(),
-                                ExplicitTemplateArgs, true, SugaredBuilder,
-                                CanonicalBuilder,
+                                ExplicitTemplateArgs, /*DefaultArgs=*/{}, true,
+                                SugaredBuilder, CanonicalBuilder,
                                 /*UpdateArgsWithConversions=*/false) ||
       Trap.hasErrorOccurred()) {
     unsigned Index = SugaredBuilder.size();
@@ -4247,7 +4216,7 @@ ResolveOverloadForDeduction(Sema &S, TemplateParameterList *TemplateParams,
     TemplateDeductionInfo Info(Ovl->getNameLoc());
     TemplateDeductionResult Result = DeduceTemplateArgumentsByTypeMatch(
         S, TemplateParams, ParamType, ArgType, Info, Deduced, TDF,
-        /*PartialOrdering=*/false, /*DeducedFromArrayBound=*/false,
+        PartialOrderingKind::None, /*DeducedFromArrayBound=*/false,
         /*HasDeducedAnyParam=*/nullptr);
     if (Result != TemplateDeductionResult::Success)
       continue;
@@ -4434,7 +4403,7 @@ static TemplateDeductionResult DeduceFromInitializerList(
       llvm::APInt Size(S.Context.getIntWidth(T), ILE->getNumInits());
       if (auto Result = DeduceNonTypeTemplateArgument(
               S, TemplateParams, NTTP, llvm::APSInt(Size), T,
-              /*ArrayBound=*/true, Info, Deduced,
+              /*ArrayBound=*/true, Info, /*PartialOrdering=*/false, Deduced,
               /*HasDeducedAnyParam=*/nullptr);
           Result != TemplateDeductionResult::Success)
         return Result;
@@ -4480,7 +4449,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsFromCallArgument(
         Sema::OriginalCallArg(OrigParamType, DecomposedParam, ArgIdx, ArgType));
   return DeduceTemplateArgumentsByTypeMatch(
       S, TemplateParams, ParamType, ArgType, Info, Deduced, TDF,
-      /*PartialOrdering=*/false, /*DeducedFromArrayBound=*/false,
+      PartialOrderingKind::None, /*DeducedFromArrayBound=*/false,
       /*HasDeducedAnyParam=*/nullptr);
 }
 
@@ -4808,7 +4777,7 @@ TemplateDeductionResult Sema::DeduceTemplateArguments(
     // Deduce template arguments from the function type.
     if (TemplateDeductionResult Result = DeduceTemplateArgumentsByTypeMatch(
             *this, TemplateParams, FunctionType, ArgFunctionType, Info, Deduced,
-            TDF, /*PartialOrdering=*/false, /*DeducedFromArrayBound=*/false,
+            TDF, PartialOrderingKind::None, /*DeducedFromArrayBound=*/false,
             /*HasDeducedAnyParam=*/nullptr);
         Result != TemplateDeductionResult::Success)
       return Result;
@@ -4987,7 +4956,7 @@ TemplateDeductionResult Sema::DeduceTemplateArguments(
 
   if (TemplateDeductionResult Result = DeduceTemplateArgumentsByTypeMatch(
           *this, TemplateParams, P, A, Info, Deduced, TDF,
-          /*PartialOrdering=*/false, /*DeducedFromArrayBound=*/false,
+          PartialOrderingKind::None, /*DeducedFromArrayBound=*/false,
           /*HasDeducedAnyParam=*/nullptr);
       Result != TemplateDeductionResult::Success)
     return Result;
@@ -5125,9 +5094,9 @@ static bool CheckDeducedPlaceholderConstraints(Sema &S, const AutoType &Type,
     TemplateArgs.addArgument(TypeLoc.getArgLoc(I));
 
   llvm::SmallVector<TemplateArgument, 4> SugaredConverted, CanonicalConverted;
-  if (S.CheckTemplateArgumentList(Concept, SourceLocation(), TemplateArgs,
-                                  /*PartialTemplateArgs=*/false,
-                                  SugaredConverted, CanonicalConverted))
+  if (S.CheckTemplateArgumentList(
+          Concept, SourceLocation(), TemplateArgs, /*DefaultArgs=*/{},
+          /*PartialTemplateArgs=*/false, SugaredConverted, CanonicalConverted))
     return true;
   MultiLevelTemplateArgumentList MLTAL(Concept, CanonicalConverted,
                                        /*Final=*/false);
@@ -5278,7 +5247,8 @@ Sema::DeduceAutoType(TypeLoc Type, Expr *Init, QualType &Result,
         if (auto TDK = DeduceTemplateArgumentsFromCallArgument(
                 *this, TemplateParamsSt.get(), 0, TemplArg, Init->getType(),
                 Init->Classify(getASTContext()), Init, Info, Deduced,
-                OriginalCallArgs, /*Decomposed=*/true,
+                OriginalCallArgs,
+                /*Decomposed=*/true,
                 /*ArgIdx=*/0, /*TDF=*/0);
             TDK != TemplateDeductionResult::Success) {
           if (TDK == TemplateDeductionResult::Inconsistent) {
@@ -5306,8 +5276,8 @@ Sema::DeduceAutoType(TypeLoc Type, Expr *Init, QualType &Result,
       if (auto TDK = DeduceTemplateArgumentsFromCallArgument(
               *this, TemplateParamsSt.get(), 0, FuncParam, Init->getType(),
               Init->Classify(getASTContext()), Init, Info, Deduced,
-              OriginalCallArgs, /*Decomposed=*/false, /*ArgIdx=*/0, /*TDF=*/0,
-              FailedTSC);
+              OriginalCallArgs,
+              /*Decomposed=*/false, /*ArgIdx=*/0, /*TDF=*/0, FailedTSC);
           TDK != TemplateDeductionResult::Success)
         return DeductionFailed(TDK);
     }
@@ -5532,10 +5502,6 @@ static TemplateDeductionResult CheckDeductionConsistency(
     ArrayRef<TemplateArgument> DeducedArgs, bool CheckConsistency) {
   MultiLevelTemplateArgumentList MLTAL(FTD, DeducedArgs,
                                        /*Final=*/true);
-  if (ArgIdx != -1)
-    if (auto *MD = dyn_cast<CXXMethodDecl>(FTD->getTemplatedDecl());
-        MD && MD->isImplicitObjectMemberFunction())
-      ArgIdx -= 1;
   Sema::ArgumentPackSubstitutionIndexRAII PackIndex(
       S, ArgIdx != -1 ? ::getPackIndexForParam(S, FTD, MLTAL, ArgIdx) : -1);
   bool IsIncompleteSubstitution = false;
@@ -5606,12 +5572,10 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction(
 
 /// Determine whether the function template \p FT1 is at least as
 /// specialized as \p FT2.
-static bool isAtLeastAsSpecializedAs(Sema &S, SourceLocation Loc,
-                                     FunctionTemplateDecl *FT1,
-                                     FunctionTemplateDecl *FT2,
-                                     TemplatePartialOrderingContext TPOC,
-                                     ArrayRef<QualType> Args1,
-                                     ArrayRef<QualType> Args2) {
+static bool isAtLeastAsSpecializedAs(
+    Sema &S, SourceLocation Loc, FunctionTemplateDecl *FT1,
+    FunctionTemplateDecl *FT2, TemplatePartialOrderingContext TPOC,
+    ArrayRef<QualType> Args1, ArrayRef<QualType> Args2, bool Args1Offset) {
   FunctionDecl *FD1 = FT1->getTemplatedDecl();
   FunctionDecl *FD2 = FT2->getTemplatedDecl();
   const FunctionProtoType *Proto1 = FD1->getType()->getAs<FunctionProtoType>();
@@ -5650,8 +5614,8 @@ static bool isAtLeastAsSpecializedAs(Sema &S, SourceLocation Loc,
   if (TPOC != TPOC_Call) {
     if (DeduceTemplateArgumentsByTypeMatch(
             S, TemplateParams, Proto2->getReturnType(), Proto1->getReturnType(),
-            Info, Deduced, TDF_None,
-            /*PartialOrdering=*/true, /*DeducedFromArrayBound=*/false,
+            Info, Deduced, TDF_None, PartialOrderingKind::Call,
+            /*DeducedFromArrayBound=*/false,
             &HasDeducedAnyParamFromReturnType) !=
         TemplateDeductionResult::Success)
       return false;
@@ -5661,7 +5625,7 @@ static bool isAtLeastAsSpecializedAs(Sema &S, SourceLocation Loc,
   if (TPOC != TPOC_Conversion) {
     HasDeducedParam.resize(Args2.size());
     if (DeduceTemplateArguments(S, TemplateParams, Args2, Args1, Info, Deduced,
-                                TDF_None, /*PartialOrdering=*/true,
+                                TDF_None, PartialOrderingKind::Call,
                                 /*HasDeducedAnyParam=*/nullptr,
                                 &HasDeducedParam) !=
         TemplateDeductionResult::Success)
@@ -5700,11 +5664,14 @@ static bool isAtLeastAsSpecializedAs(Sema &S, SourceLocation Loc,
 
               return ::DeduceForEachType(
                   S, TemplateParams, Args2, Args1, Info, Deduced,
-                  /*PartialOrdering=*/true, /*FinishingDeduction=*/true,
+                  PartialOrderingKind::Call, /*FinishingDeduction=*/true,
                   [&](Sema &S, TemplateParameterList *, int ParamIdx,
                       int ArgIdx, QualType P, QualType A,
                       TemplateDeductionInfo &Info,
-                      SmallVectorImpl<DeducedTemplateArgument> &Deduced, bool) {
+                      SmallVectorImpl<DeducedTemplateArgument> &Deduced,
+                      PartialOrderingKind) {
+                    if (ArgIdx != -1)
+                      ArgIdx -= Args1Offset;
                     return ::CheckDeductionConsistency(
                         S, FTD, ArgIdx, P, A, DeducedArgs,
                         /*CheckConsistency=*/HasDeducedParam[ParamIdx]);
@@ -5792,6 +5759,8 @@ FunctionTemplateDecl *Sema::getMoreSpecializedTemplate(
   const FunctionDecl *FD2 = FT2->getTemplatedDecl();
   bool ShouldConvert1 = false;
   bool ShouldConvert2 = false;
+  bool Args1Offset = false;
+  bool Args2Offset = false;
   QualType Obj1Ty;
   QualType Obj2Ty;
   if (TPOC == TPOC_Call) {
@@ -5840,6 +5809,7 @@ FunctionTemplateDecl *Sema::getMoreSpecializedTemplate(
         Obj1Ty = GetImplicitObjectParameterType(this->Context, Method1,
                                                 RawObj1Ty, IsRValRef2);
         Args1.push_back(Obj1Ty);
+        Args1Offset = true;
       }
       if (ShouldConvert2) {
         bool IsRValRef1 =
@@ -5850,6 +5820,7 @@ FunctionTemplateDecl *Sema::getMoreSpecializedTemplate(
         Obj2Ty = GetImplicitObjectParameterType(this->Context, Method2,
                                                 RawObj2Ty, IsRValRef1);
         Args2.push_back(Obj2Ty);
+        Args2Offset = true;
       }
     } else {
       if (NonStaticMethod1 && Method1->hasCXXExplicitFunctionObjectParameter())
@@ -5871,10 +5842,10 @@ FunctionTemplateDecl *Sema::getMoreSpecializedTemplate(
   } else {
     assert(!Reversed && "Only call context could have reversed arguments");
   }
-  bool Better1 =
-      isAtLeastAsSpecializedAs(*this, Loc, FT1, FT2, TPOC, Args1, Args2);
-  bool Better2 =
-      isAtLeastAsSpecializedAs(*this, Loc, FT2, FT1, TPOC, Args2, Args1);
+  bool Better1 = isAtLeastAsSpecializedAs(*this, Loc, FT1, FT2, TPOC, Args1,
+                                          Args2, Args2Offset);
+  bool Better2 = isAtLeastAsSpecializedAs(*this, Loc, FT2, FT1, TPOC, Args2,
+                                          Args1, Args1Offset);
   // C++ [temp.deduct.partial]p10:
   //   F is more specialized than G if F is at least as specialized as G and G
   //   is not at least as specialized as F.
@@ -6151,7 +6122,7 @@ static bool isAtLeastAsSpecializedAs(Sema &S, QualType T1, QualType T2,
   Deduced.resize(P2->getTemplateParameters()->size());
   if (DeduceTemplateArgumentsByTypeMatch(
           S, P2->getTemplateParameters(), T2, T1, Info, Deduced, TDF_None,
-          /*PartialOrdering=*/true, /*DeducedFromArrayBound=*/false,
+          PartialOrderingKind::Call, /*DeducedFromArrayBound=*/false,
           /*HasDeducedAnyParam=*/nullptr) != TemplateDeductionResult::Success)
     return false;
 
@@ -6408,8 +6379,8 @@ bool Sema::isMoreSpecializedThanPrimary(
 }
 
 bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs(
-    TemplateParameterList *P, TemplateDecl *AArg, SourceLocation Loc,
-    bool IsDeduced) {
+    TemplateParameterList *P, TemplateDecl *AArg,
+    const DefaultArguments &DefaultArgs, SourceLocation Loc, bool IsDeduced) {
   // C++1z [temp.arg.template]p4: (DR 150)
   //   A template template-parameter P is at least as specialized as a
   //   template template-argument A if, given the following rewrite to two
@@ -6457,8 +6428,9 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs(
     //   If the rewrite produces an invalid type, then P is not at least as
     //   specialized as A.
     SmallVector<TemplateArgument, 4> SugaredPArgs;
-    if (CheckTemplateArgumentList(AArg, Loc, PArgList, false, SugaredPArgs,
-                                  PArgs, /*UpdateArgsWithConversions=*/true,
+    if (CheckTemplateArgumentList(AArg, Loc, PArgList, DefaultArgs, false,
+                                  SugaredPArgs, PArgs,
+                                  /*UpdateArgsWithConversions=*/true,
                                   /*ConstraintsNotSatisfied=*/nullptr,
                                   /*PartialOrderTTP=*/true) ||
         Trap.hasErrorOccurred())
@@ -6483,6 +6455,7 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs(
   //   currently implemented as a special case elsewhere.
   if (::DeduceTemplateArguments(*this, A, AArgs, PArgs, Info, Deduced,
                                 /*NumberOfArgumentsMustMatch=*/false,
+                                /*PartialOrdering=*/true,
                                 IsDeduced ? PackFold::ArgumentToParameter
                                           : PackFold::ParameterToArgument,
                                 /*HasDeducedAnyParam=*/nullptr) !=
diff --git clang/lib/Sema/SemaTemplateInstantiateDecl.cpp clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 51109b092d75..bb311e384092 100644
--- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -3920,10 +3920,10 @@ TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl(
   // Check that the template argument list is well-formed for this
   // class template.
   SmallVector<TemplateArgument, 4> SugaredConverted, CanonicalConverted;
-  if (SemaRef.CheckTemplateArgumentList(InstClassTemplate, D->getLocation(),
-                                        InstTemplateArgs, false,
-                                        SugaredConverted, CanonicalConverted,
-                                        /*UpdateArgsWithConversions=*/true))
+  if (SemaRef.CheckTemplateArgumentList(
+          InstClassTemplate, D->getLocation(), InstTemplateArgs,
+          /*DefaultArgs=*/{}, false, SugaredConverted, CanonicalConverted,
+          /*UpdateArgsWithConversions=*/true))
     return nullptr;
 
   // Figure out where to insert this class template explicit specialization
@@ -4028,10 +4028,10 @@ Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
 
   // Check that the template argument list is well-formed for this template.
   SmallVector<TemplateArgument, 4> SugaredConverted, CanonicalConverted;
-  if (SemaRef.CheckTemplateArgumentList(InstVarTemplate, D->getLocation(),
-                                        VarTemplateArgsInfo, false,
-                                        SugaredConverted, CanonicalConverted,
-                                        /*UpdateArgsWithConversions=*/true))
+  if (SemaRef.CheckTemplateArgumentList(
+          InstVarTemplate, D->getLocation(), VarTemplateArgsInfo,
+          /*DefaultArgs=*/{}, false, SugaredConverted, CanonicalConverted,
+          /*UpdateArgsWithConversions=*/true))
     return nullptr;
 
   // Check whether we've already seen a declaration of this specialization.
@@ -4296,6 +4296,7 @@ TemplateDeclInstantiator::InstantiateClassTemplatePartialSpecialization(
   SmallVector<TemplateArgument, 4> SugaredConverted, CanonicalConverted;
   if (SemaRef.CheckTemplateArgumentList(
           ClassTemplate, PartialSpec->getLocation(), InstTemplateArgs,
+          /*DefaultArgs=*/{},
           /*PartialTemplateArgs=*/false, SugaredConverted, CanonicalConverted))
     return nullptr;
 
@@ -4407,9 +4408,10 @@ TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization(
   // Check that the template argument list is well-formed for this
   // class template.
   SmallVector<TemplateArgument, 4> SugaredConverted, CanonicalConverted;
-  if (SemaRef.CheckTemplateArgumentList(
-          VarTemplate, PartialSpec->getLocation(), InstTemplateArgs,
-          /*PartialTemplateArgs=*/false, SugaredConverted, CanonicalConverted))
+  if (SemaRef.CheckTemplateArgumentList(VarTemplate, PartialSpec->getLocation(),
+                                        InstTemplateArgs, /*DefaultArgs=*/{},
+                                        /*PartialTemplateArgs=*/false,
+                                        SugaredConverted, CanonicalConverted))
     return nullptr;
 
   // Check these arguments are valid for a template partial specialization.
@@ -5479,7 +5481,10 @@ void Sema::InstantiateVariableInitializer(
     EnterExpressionEvaluationContext Evaluated(
         *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated, Var);
 
-    keepInLifetimeExtendingContext();
+    currentEvaluationContext().InLifetimeExtendingContext =
+        parentEvaluationContext().InLifetimeExtendingContext;
+    currentEvaluationContext().RebuildDefaultArgOrDefaultInit =
+        parentEvaluationContext().RebuildDefaultArgOrDefaultInit;
     // Instantiate the initializer.
     ExprResult Init;
 
diff --git clang/lib/Sema/SemaTemplateVariadic.cpp clang/lib/Sema/SemaTemplateVariadic.cpp
index bcd31c98871e..40522a07f633 100644
--- clang/lib/Sema/SemaTemplateVariadic.cpp
+++ clang/lib/Sema/SemaTemplateVariadic.cpp
@@ -39,6 +39,10 @@ namespace {
     bool InLambda = false;
     unsigned DepthLimit = (unsigned)-1;
 
+#ifndef NDEBUG
+    bool ContainsFunctionParmPackExpr = false;
+#endif
+
     void addUnexpanded(NamedDecl *ND, SourceLocation Loc = SourceLocation()) {
       if (auto *VD = dyn_cast<VarDecl>(ND)) {
         // For now, the only problematic case is a generic lambda's templated
@@ -280,6 +284,17 @@ namespace {
 
       return inherited::TraverseLambdaCapture(Lambda, C, Init);
     }
+
+#ifndef NDEBUG
+    bool TraverseFunctionParmPackExpr(FunctionParmPackExpr *) {
+      ContainsFunctionParmPackExpr = true;
+      return true;
+    }
+
+    bool containsFunctionParmPackExpr() const {
+      return ContainsFunctionParmPackExpr;
+    }
+#endif
   };
 }
 
@@ -414,16 +429,21 @@ bool Sema::DiagnoseUnexpandedParameterPack(Expr *E,
   if (!E->containsUnexpandedParameterPack())
     return false;
 
-  // CollectUnexpandedParameterPacksVisitor does not expect to see a
-  // FunctionParmPackExpr, but diagnosing unexpected parameter packs may still
-  // see such an expression in a lambda body.
-  // We'll bail out early in this case to avoid triggering an assertion.
-  if (isa<FunctionParmPackExpr>(E) && getEnclosingLambda())
-    return false;
-
+  // FunctionParmPackExprs are special:
+  //
+  // 1) they're used to model DeclRefExprs to packs that have been expanded but
+  // had that expansion held off in the process of transformation.
+  //
+  // 2) they always have the unexpanded dependencies but don't introduce new
+  // unexpanded packs.
+  //
+  // We might encounter a FunctionParmPackExpr being a full expression, which a
+  // larger CXXFoldExpr would expand.
   SmallVector<UnexpandedParameterPack, 2> Unexpanded;
-  CollectUnexpandedParameterPacksVisitor(Unexpanded).TraverseStmt(E);
-  assert(!Unexpanded.empty() && "Unable to find unexpanded parameter packs");
+  CollectUnexpandedParameterPacksVisitor Visitor(Unexpanded);
+  Visitor.TraverseStmt(E);
+  assert((!Unexpanded.empty() || Visitor.containsFunctionParmPackExpr()) &&
+         "Unable to find unexpanded parameter packs");
   return DiagnoseUnexpandedParameterPacks(E->getBeginLoc(), UPPC, Unexpanded);
 }
 
diff --git clang/lib/Sema/SemaType.cpp clang/lib/Sema/SemaType.cpp
index 7df8f663da26..e627fee51b66 100644
--- clang/lib/Sema/SemaType.cpp
+++ clang/lib/Sema/SemaType.cpp
@@ -6490,6 +6490,15 @@ static void HandleBTFTypeTagAttribute(QualType &Type, const ParsedAttr &Attr,
                                       TypeProcessingState &State) {
   Sema &S = State.getSema();
 
+  // This attribute is only supported in C.
+  // FIXME: we should implement checkCommonAttributeFeatures() in SemaAttr.cpp
+  // such that it handles type attributes, and then call that from
+  // processTypeAttrs() instead of one-off checks like this.
+  if (!Attr.diagnoseLangOpts(S)) {
+    Attr.setInvalid();
+    return;
+  }
+
   // Check the number of attribute arguments.
   if (Attr.getNumArgs() != 1) {
     S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
@@ -8835,7 +8844,11 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
     }
     case ParsedAttr::AT_HLSLResourceClass:
     case ParsedAttr::AT_HLSLROV: {
-      if (state.getSema().HLSL().handleResourceTypeAttr(attr))
+      // Only collect HLSL resource type attributes that are in
+      // decl-specifier-seq; do not collect attributes on declarations or those
+      // that get to slide after declaration name.
+      if (TAL == TAL_DeclSpec &&
+          state.getSema().HLSL().handleResourceTypeAttr(attr))
         attr.setUsedAsTypeAttr();
       break;
     }
diff --git clang/lib/Sema/SemaX86.cpp clang/lib/Sema/SemaX86.cpp
index 233a068c8574..6a4d78f0ca90 100644
--- clang/lib/Sema/SemaX86.cpp
+++ clang/lib/Sema/SemaX86.cpp
@@ -46,6 +46,14 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_vcvttsh2si64:
   case X86::BI__builtin_ia32_vcvttsh2usi32:
   case X86::BI__builtin_ia32_vcvttsh2usi64:
+  case X86::BI__builtin_ia32_vcvttsd2sis32:
+  case X86::BI__builtin_ia32_vcvttsd2usis32:
+  case X86::BI__builtin_ia32_vcvttss2sis32:
+  case X86::BI__builtin_ia32_vcvttss2usis32:
+  case X86::BI__builtin_ia32_vcvttsd2sis64:
+  case X86::BI__builtin_ia32_vcvttsd2usis64:
+  case X86::BI__builtin_ia32_vcvttss2sis64:
+  case X86::BI__builtin_ia32_vcvttss2usis64:
     ArgNum = 1;
     break;
   case X86::BI__builtin_ia32_maxpd512:
@@ -435,6 +443,24 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
     ArgNum = 4;
     HasRC = true;
     break;
+  case X86::BI__builtin_ia32_vcvttpd2dqs256_round_mask:
+  case X86::BI__builtin_ia32_vcvttpd2dqs512_round_mask:
+  case X86::BI__builtin_ia32_vcvttpd2udqs256_round_mask:
+  case X86::BI__builtin_ia32_vcvttpd2udqs512_round_mask:
+  case X86::BI__builtin_ia32_vcvttpd2qqs256_round_mask:
+  case X86::BI__builtin_ia32_vcvttpd2qqs512_round_mask:
+  case X86::BI__builtin_ia32_vcvttpd2uqqs256_round_mask:
+  case X86::BI__builtin_ia32_vcvttpd2uqqs512_round_mask:
+  case X86::BI__builtin_ia32_vcvttps2dqs256_round_mask:
+  case X86::BI__builtin_ia32_vcvttps2dqs512_round_mask:
+  case X86::BI__builtin_ia32_vcvttps2udqs256_round_mask:
+  case X86::BI__builtin_ia32_vcvttps2udqs512_round_mask:
+  case X86::BI__builtin_ia32_vcvttps2qqs256_round_mask:
+  case X86::BI__builtin_ia32_vcvttps2qqs512_round_mask:
+  case X86::BI__builtin_ia32_vcvttps2uqqs256_round_mask:
+  case X86::BI__builtin_ia32_vcvttps2uqqs512_round_mask:
+    ArgNum = 3;
+    break;
   }
 
   llvm::APSInt Result;
diff --git clang/lib/Sema/TreeTransform.h clang/lib/Sema/TreeTransform.h
index 66e3f27fed9d..ff745b3385fc 100644
--- clang/lib/Sema/TreeTransform.h
+++ clang/lib/Sema/TreeTransform.h
@@ -113,9 +113,13 @@ class TreeTransform {
   class ForgetPartiallySubstitutedPackRAII {
     Derived &Self;
     TemplateArgument Old;
+    // Set the pack expansion index to -1 to avoid pack substitution and
+    // indicate that parameter packs should be instantiated as themselves.
+    Sema::ArgumentPackSubstitutionIndexRAII ResetPackSubstIndex;
 
   public:
-    ForgetPartiallySubstitutedPackRAII(Derived &Self) : Self(Self) {
+    ForgetPartiallySubstitutedPackRAII(Derived &Self)
+        : Self(Self), ResetPackSubstIndex(Self.getSema(), -1) {
       Old = Self.ForgetPartiallySubstitutedPack();
     }
 
@@ -4254,7 +4258,10 @@ ExprResult TreeTransform<Derived>::TransformInitializer(Expr *Init,
       getSema(), EnterExpressionEvaluationContext::InitList,
       Construct->isListInitialization());
 
-  getSema().keepInLifetimeExtendingContext();
+  getSema().currentEvaluationContext().InLifetimeExtendingContext =
+      getSema().parentEvaluationContext().InLifetimeExtendingContext;
+  getSema().currentEvaluationContext().RebuildDefaultArgOrDefaultInit =
+      getSema().parentEvaluationContext().RebuildDefaultArgOrDefaultInit;
   SmallVector<Expr*, 8> NewArgs;
   bool ArgChanged = false;
   if (getDerived().TransformExprs(Construct->getArgs(), Construct->getNumArgs(),
@@ -6687,10 +6694,10 @@ TreeTransform<Derived>::TransformPackIndexingType(TypeLocBuilder &TLB,
   bool NotYetExpanded = Types.empty();
   bool FullySubstituted = true;
 
-  if (Types.empty())
+  if (Types.empty() && !PIT->expandsToEmptyPack())
     Types = llvm::ArrayRef<QualType>(&Pattern, 1);
 
-  for (const QualType &T : Types) {
+  for (QualType T : Types) {
     if (!T->containsUnexpandedParameterPack()) {
       QualType Transformed = getDerived().TransformType(T);
       if (Transformed.isNull())
@@ -7462,8 +7469,26 @@ QualType TreeTransform<Derived>::TransformBTFTagAttributedType(
 template <typename Derived>
 QualType TreeTransform<Derived>::TransformHLSLAttributedResourceType(
     TypeLocBuilder &TLB, HLSLAttributedResourceTypeLoc TL) {
-  llvm_unreachable(
-      "Unexpected TreeTransform for HLSLAttributedResourceTypeLoc");
+
+  const HLSLAttributedResourceType *oldType = TL.getTypePtr();
+
+  QualType WrappedTy = getDerived().TransformType(TLB, TL.getWrappedLoc());
+  if (WrappedTy.isNull())
+    return QualType();
+
+  QualType ContainedTy = QualType();
+  if (!oldType->getContainedType().isNull())
+    ContainedTy = getDerived().TransformType(TLB, TL.getContainedLoc());
+
+  QualType Result = TL.getType();
+  if (getDerived().AlwaysRebuild() || WrappedTy != oldType->getWrappedType() ||
+      ContainedTy != oldType->getContainedType()) {
+    Result = SemaRef.Context.getHLSLAttributedResourceType(
+        WrappedTy, ContainedTy, oldType->getAttrs());
+  }
+
+  TLB.push<HLSLAttributedResourceTypeLoc>(Result);
+  return Result;
 }
 
 template<typename Derived>
@@ -8906,8 +8931,9 @@ TreeTransform<Derived>::TransformCXXForRangeStmt(CXXForRangeStmt *S) {
 
   // P2718R0 - Lifetime extension in range-based for loops.
   if (getSema().getLangOpts().CPlusPlus23) {
-    auto &LastRecord = getSema().ExprEvalContexts.back();
+    auto &LastRecord = getSema().currentEvaluationContext();
     LastRecord.InLifetimeExtendingContext = true;
+    LastRecord.RebuildDefaultArgOrDefaultInit = true;
   }
   StmtResult Init =
       S->getInit() ? getDerived().TransformStmt(S->getInit()) : StmtResult();
@@ -14425,6 +14451,13 @@ TreeTransform<Derived>::TransformCXXTemporaryObjectExpr(
     if (TransformExprs(E->getArgs(), E->getNumArgs(), true, Args,
                        &ArgumentChanged))
       return ExprError();
+
+    if (E->isListInitialization() && !E->isStdInitListInitialization()) {
+      ExprResult Res = RebuildInitList(E->getBeginLoc(), Args, E->getEndLoc());
+      if (Res.isInvalid())
+        return ExprError();
+      Args = {Res.get()};
+    }
   }
 
   if (!getDerived().AlwaysRebuild() &&
@@ -14436,12 +14469,9 @@ TreeTransform<Derived>::TransformCXXTemporaryObjectExpr(
     return SemaRef.MaybeBindToTemporary(E);
   }
 
-  // FIXME: We should just pass E->isListInitialization(), but we're not
-  // prepared to handle list-initialization without a child InitListExpr.
   SourceLocation LParenLoc = T->getTypeLoc().getEndLoc();
   return getDerived().RebuildCXXTemporaryObjectExpr(
-      T, LParenLoc, Args, E->getEndLoc(),
-      /*ListInitialization=*/LParenLoc.isInvalid());
+      T, LParenLoc, Args, E->getEndLoc(), E->isListInitialization());
 }
 
 template<typename Derived>
diff --git clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index 22061373c4b3..8bb7880a3cc2 100644
--- clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -1129,7 +1129,7 @@ tryToInvalidateFReadBufferByElements(ProgramStateRef State, CheckerContext &C,
   if (!ElemTy.isNull() && CountVal && Size && StartIndexVal) {
     int64_t NumBytesRead = Size.value() * CountVal.value();
     int64_t ElemSizeInChars = Ctx.getTypeSizeInChars(ElemTy).getQuantity();
-    if (ElemSizeInChars == 0)
+    if (ElemSizeInChars == 0 || NumBytesRead < 0)
       return nullptr;
 
     bool IncompleteLastElement = (NumBytesRead % ElemSizeInChars) != 0;
diff --git clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
index 49bbff194216..f48b2fd9dca7 100644
--- clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
+++ clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
@@ -143,6 +143,16 @@ bool isReturnValueRefCounted(const clang::FunctionDecl *F) {
   return false;
 }
 
+std::optional<bool> isUncounted(const QualType T) {
+  if (auto *Subst = dyn_cast<SubstTemplateTypeParmType>(T)) {
+    if (auto *Decl = Subst->getAssociatedDecl()) {
+      if (isRefType(safeGetName(Decl)))
+        return false;
+    }
+  }
+  return isUncounted(T->getAsCXXRecordDecl());
+}
+
 std::optional<bool> isUncounted(const CXXRecordDecl* Class)
 {
   // Keep isRefCounted first as it's cheaper.
@@ -231,11 +241,9 @@ bool isSingleton(const FunctionDecl *F) {
     if (!MethodDecl->isStatic())
       return false;
   }
-  const auto &Name = safeGetName(F);
-  std::string SingletonStr = "singleton";
-  auto index = Name.find(SingletonStr);
-  return index != std::string::npos &&
-         index == Name.size() - SingletonStr.size();
+  const auto &NameStr = safeGetName(F);
+  StringRef Name = NameStr; // FIXME: Make safeGetName return StringRef.
+  return Name == "singleton" || Name.ends_with("Singleton");
 }
 
 // We only care about statements so let's use the simple
@@ -397,6 +405,7 @@ public:
       return true;
 
     if (Name == "WTFCrashWithInfo" || Name == "WTFBreakpointTrap" ||
+        Name == "WTFReportBacktrace" ||
         Name == "WTFCrashWithSecurityImplication" || Name == "WTFCrash" ||
         Name == "WTFReportAssertionFailure" || Name == "isMainThread" ||
         Name == "isMainThreadOrGCThread" || Name == "isMainRunLoop" ||
diff --git clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
index ec1db1cc3358..2932e62ad06e 100644
--- clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
+++ clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
@@ -20,6 +20,7 @@ class CXXMethodDecl;
 class CXXRecordDecl;
 class Decl;
 class FunctionDecl;
+class QualType;
 class Stmt;
 class Type;
 
@@ -42,6 +43,10 @@ std::optional<bool> isRefCountable(const clang::CXXRecordDecl* Class);
 /// \returns true if \p Class is ref-counted, false if not.
 bool isRefCounted(const clang::CXXRecordDecl *Class);
 
+/// \returns true if \p Class is ref-countable AND not ref-counted, false if
+/// not, std::nullopt if inconclusive.
+std::optional<bool> isUncounted(const clang::QualType T);
+
 /// \returns true if \p Class is ref-countable AND not ref-counted, false if
 /// not, std::nullopt if inconclusive.
 std::optional<bool> isUncounted(const clang::CXXRecordDecl* Class);
diff --git clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp
index 9df108e28ecd..ecba5f9aa23e 100644
--- clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp
@@ -67,6 +67,32 @@ public:
     const Decl *D = CE->getCalleeDecl();
     if (D && D->hasBody())
       return VisitBody(D->getBody());
+    else {
+      auto name = safeGetName(D);
+      if (name == "ensureOnMainThread" || name == "ensureOnMainRunLoop") {
+        for (unsigned i = 0; i < CE->getNumArgs(); ++i) {
+          auto *Arg = CE->getArg(i);
+          if (VisitLabmdaArgument(Arg))
+            return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  bool VisitLabmdaArgument(const Expr *E) {
+    E = E->IgnoreParenCasts();
+    if (auto *TempE = dyn_cast<CXXBindTemporaryExpr>(E))
+      E = TempE->getSubExpr();
+    if (auto *ConstructE = dyn_cast<CXXConstructExpr>(E)) {
+      for (unsigned i = 0; i < ConstructE->getNumArgs(); ++i) {
+        auto *Arg = ConstructE->getArg(i);
+        if (auto *Lambda = dyn_cast<LambdaExpr>(Arg)) {
+          if (VisitBody(Lambda->getBody()))
+            return true;
+        }
+      }
+    }
     return false;
   }
 
diff --git clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
index 704c082a4d1d..81c2434ce647 100644
--- clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
@@ -87,8 +87,7 @@ public:
         }
         auto *E = MemberCallExpr->getImplicitObjectArgument();
         QualType ArgType = MemberCallExpr->getObjectType();
-        std::optional<bool> IsUncounted =
-            isUncounted(ArgType->getAsCXXRecordDecl());
+        std::optional<bool> IsUncounted = isUncounted(ArgType);
         if (IsUncounted && *IsUncounted && !isPtrOriginSafe(E))
           reportBugOnThis(E);
       }
diff --git clang/lib/StaticAnalyzer/Core/CheckerManager.cpp clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
index 6fc16223ea82..524a4c43abf2 100644
--- clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
+++ clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
@@ -48,16 +48,6 @@ bool CheckerManager::hasPathSensitiveCheckers() const {
       EvalCallCheckers, EndOfTranslationUnitCheckers);
 }
 
-void CheckerManager::finishedCheckerRegistration() {
-#ifndef NDEBUG
-  // Make sure that for every event that has listeners, there is at least
-  // one dispatcher registered for it.
-  for (const auto &Event : Events)
-    assert(Event.second.HasDispatcher &&
-           "No dispatcher registered for an event");
-#endif
-}
-
 void CheckerManager::reportInvalidCheckerOptionValue(
     const CheckerBase *C, StringRef OptionName,
     StringRef ExpectedValueDesc) const {
diff --git clang/lib/StaticAnalyzer/Core/ExprEngine.cpp clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index dfb7111b5125..fdabba46992b 100644
--- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1207,9 +1207,14 @@ void ExprEngine::ProcessInitializer(const CFGInitializer CFGInit,
           Init = ASE->getBase()->IgnoreImplicit();
 
         SVal LValue = State->getSVal(Init, stackFrame);
-        if (!Field->getType()->isReferenceType())
-          if (std::optional<Loc> LValueLoc = LValue.getAs<Loc>())
+        if (!Field->getType()->isReferenceType()) {
+          if (std::optional<Loc> LValueLoc = LValue.getAs<Loc>()) {
             InitVal = State->getSVal(*LValueLoc);
+          } else if (auto CV = LValue.getAs<nonloc::CompoundVal>()) {
+            // Initializer list for an array.
+            InitVal = *CV;
+          }
+        }
 
         // If we fail to get the value for some reason, use a symbolic value.
         if (InitVal.isUnknownOrUndef()) {
@@ -1933,6 +1938,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::CXXRewrittenBinaryOperatorClass:
     case Stmt::RequiresExprClass:
     case Expr::CXXParenListInitExprClass:
+    case Stmt::EmbedExprClass:
       // Fall through.
 
     // Cases we intentionally don't evaluate, since they don't need
@@ -2435,10 +2441,6 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       Bldr.addNodes(Dst);
       break;
     }
-
-    case Stmt::EmbedExprClass:
-      llvm::report_fatal_error("Support for EmbedExpr is not implemented.");
-      break;
   }
 }
 
diff --git clang/lib/StaticAnalyzer/Core/RegionStore.cpp clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index ba29c1231390..c257a87dff38 100644
--- clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -2380,8 +2380,12 @@ RegionStoreManager::bind(RegionBindingsConstRef B, Loc L, SVal V) {
 
   // Binding directly to a symbolic region should be treated as binding
   // to element 0.
-  if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R))
-    R = GetElementZeroRegion(SR, SR->getPointeeStaticType());
+  if (const auto *SymReg = dyn_cast<SymbolicRegion>(R)) {
+    QualType Ty = SymReg->getPointeeStaticType();
+    if (Ty->isVoidType())
+      Ty = StateMgr.getContext().CharTy;
+    R = GetElementZeroRegion(SymReg, Ty);
+  }
 
   assert((!isa<CXXThisRegion>(R) || !B.lookup(R)) &&
          "'this' pointer is not an l-value and is not assignable");
diff --git clang/lib/StaticAnalyzer/Frontend/CreateCheckerManager.cpp clang/lib/StaticAnalyzer/Frontend/CreateCheckerManager.cpp
index 21a60785eb52..f60221ad7587 100644
--- clang/lib/StaticAnalyzer/Frontend/CreateCheckerManager.cpp
+++ clang/lib/StaticAnalyzer/Frontend/CreateCheckerManager.cpp
@@ -28,7 +28,6 @@ CheckerManager::CheckerManager(
                            AOptions, checkerRegistrationFns);
   Registry.initializeRegistry(*this);
   Registry.initializeManager(*this);
-  finishedCheckerRegistration();
 }
 
 CheckerManager::CheckerManager(AnalyzerOptions &AOptions,
diff --git clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
index 7458ef484b16..4fb597758049 100644
--- clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
@@ -15,9 +15,9 @@ using namespace dependencies;
 
 DependencyScanningService::DependencyScanningService(
     ScanningMode Mode, ScanningOutputFormat Format,
-    ScanningOptimizations OptimizeArgs, bool EagerLoadModules)
+    ScanningOptimizations OptimizeArgs, bool EagerLoadModules, bool TraceVFS)
     : Mode(Mode), Format(Format), OptimizeArgs(OptimizeArgs),
-      EagerLoadModules(EagerLoadModules) {
+      EagerLoadModules(EagerLoadModules), TraceVFS(TraceVFS) {
   // Initialize targets for object file support.
   llvm::InitializeAllTargets();
   llvm::InitializeAllTargetMCs();
diff --git clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 1a21a4f5e30f..d77187bfb1f2 100644
--- clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -430,7 +430,9 @@ public:
 
     std::unique_ptr<FrontendAction> Action;
 
-    if (ModuleName)
+    if (Format == ScanningOutputFormat::P1689)
+      Action = std::make_unique<PreprocessOnlyAction>();
+    else if (ModuleName)
       Action = std::make_unique<GetDependenciesByModuleNameAction>(*ModuleName);
     else
       Action = std::make_unique<ReadPCHAndPreprocessAction>();
@@ -499,6 +501,9 @@ DependencyScanningWorker::DependencyScanningWorker(
   // The scanner itself writes only raw ast files.
   PCHContainerOps->registerWriter(std::make_unique<RawPCHContainerWriter>());
 
+  if (Service.shouldTraceVFS())
+    FS = llvm::makeIntrusiveRefCnt<llvm::vfs::TracingFileSystem>(std::move(FS));
+
   switch (Service.getMode()) {
   case ScanningMode::DependencyDirectivesScan:
     DepFS =
diff --git clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index 370d83484685..c775adc0ddd7 100644
--- clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -569,12 +569,11 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
     return {};
 
   // If this module has been handled already, just return its ID.
-  auto ModI = MDC.ModularDeps.insert({M, nullptr});
-  if (!ModI.second)
-    return ModI.first->second->ID;
+  if (auto ModI = MDC.ModularDeps.find(M); ModI != MDC.ModularDeps.end())
+    return ModI->second->ID;
 
-  ModI.first->second = std::make_unique<ModuleDeps>();
-  ModuleDeps &MD = *ModI.first->second;
+  auto OwnedMD = std::make_unique<ModuleDeps>();
+  ModuleDeps &MD = *OwnedMD;
 
   MD.ID.ModuleName = M->getFullModuleName();
   MD.IsSystem = M->IsSystem;
@@ -650,6 +649,8 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
 
   MD.BuildInfo = std::move(CI);
 
+  MDC.ModularDeps.insert({M, std::move(OwnedMD)});
+
   return MD.ID;
 }
 
diff --git clang/lib/Tooling/Transformer/Stencil.cpp clang/lib/Tooling/Transformer/Stencil.cpp
index bc4fa6e36057..223fb5a76897 100644
--- clang/lib/Tooling/Transformer/Stencil.cpp
+++ clang/lib/Tooling/Transformer/Stencil.cpp
@@ -50,7 +50,13 @@ static Error printNode(StringRef Id, const MatchFinder::MatchResult &Match,
   auto NodeOrErr = getNode(Match.Nodes, Id);
   if (auto Err = NodeOrErr.takeError())
     return Err;
-  NodeOrErr->print(Os, PrintingPolicy(Match.Context->getLangOpts()));
+  const PrintingPolicy PP(Match.Context->getLangOpts());
+  if (const auto *ND = NodeOrErr->get<NamedDecl>()) {
+    // For NamedDecls, we can do a better job than printing the whole thing.
+    ND->getNameForDiagnostic(Os, PP, false);
+  } else {
+    NodeOrErr->print(Os, PP);
+  }
   *Result += Output;
   return Error::success();
 }
diff --git clang/test/AST/ByteCode/builtin-functions.cpp clang/test/AST/ByteCode/builtin-functions.cpp
index 9c9ca23e0a6a..9fd5eae67a21 100644
--- clang/test/AST/ByteCode/builtin-functions.cpp
+++ clang/test/AST/ByteCode/builtin-functions.cpp
@@ -968,3 +968,10 @@ namespace FunctionStart {
   static_assert(__builtin_function_start(a) == a, ""); // both-error {{not an integral constant expression}} \
                                                        // both-note {{comparison of addresses of literals has unspecified value}}
 }
+
+namespace BuiltinInImplicitCtor {
+  constexpr struct {
+    int a = __builtin_isnan(1.0);
+  } Foo;
+  static_assert(Foo.a == 0, "");
+}
diff --git clang/test/AST/ByteCode/codegen.cpp clang/test/AST/ByteCode/codegen.cpp
index 9fac28a52d31..12d8b5a5c548 100644
--- clang/test/AST/ByteCode/codegen.cpp
+++ clang/test/AST/ByteCode/codegen.cpp
@@ -73,3 +73,21 @@ namespace Null {
   // CHECK: call {{.*}} @_ZN4Null4nullEv(
   int S::*q = null();
 }
+
+struct A {
+  A();
+  ~A();
+  enum E { Foo };
+};
+
+A *g();
+
+void f(A *a) {
+  A::E e1 = a->Foo;
+
+  // CHECK: call noundef ptr @_Z1gv()
+  A::E e2 = g()->Foo;
+  // CHECK: call void @_ZN1AC1Ev(
+  // CHECK: call void @_ZN1AD1Ev(
+  A::E e3 = A().Foo;
+}
diff --git clang/test/AST/ByteCode/const-base-cast.cpp clang/test/AST/ByteCode/const-base-cast.cpp
new file mode 100644
index 000000000000..80226b973bf9
--- /dev/null
+++ clang/test/AST/ByteCode/const-base-cast.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - -fexperimental-new-constant-interpreter | FileCheck %s
+
+
+/// Slightly adapted to the version from test/CodeGenCXX/.
+
+struct X { int x[12];};
+struct A : X { char x, y, z; };
+struct B { char y; };
+struct C : A,B {};
+unsigned char x = ((char*)(X*)(C*)0x1000) - (char*)0x1000;
+// CHECK: @x = {{(dso_local )?}}global i8 0
+
+unsigned char y = ((char*)(B*)(C*)0x1000) - (char*)0x1000;
+// CHECK: @y = {{(dso_local )?}}global i8 51
+
+unsigned char z = ((char*)(A*)(C*)0x1000) - (char*)0x1000;
+// CHECK: @z = {{(dso_local )?}}global i8 0
+
diff --git clang/test/AST/ByteCode/constexpr-vectors.cpp clang/test/AST/ByteCode/constexpr-vectors.cpp
index a738cfe617a0..7a65b2637845 100644
--- clang/test/AST/ByteCode/constexpr-vectors.cpp
+++ clang/test/AST/ByteCode/constexpr-vectors.cpp
@@ -15,8 +15,64 @@ using FourI128ExtVec __attribute__((ext_vector_type(4))) = __int128;
 // Only int vs float makes a difference here, so we only need to test 1 of each.
 // Test Char to make sure the mixed-nature of shifts around char is evident.
 void CharUsage() {
-  constexpr auto H = FourCharsVecSize{-1, -1, 0, -1};
-  constexpr auto InvH = -H;
+  constexpr auto w = FourCharsVecSize{1, 2, 3, 4} <
+                     FourCharsVecSize{4, 3, 2, 1};
+  static_assert(w[0] == -1 && w[1] == -1 && w[2] == 0 && w[3] == 0, "");
+
+  constexpr auto x = FourCharsVecSize{1, 2, 3, 4} >
+                     FourCharsVecSize{4, 3, 2, 1};
+  static_assert(x[0] == 0 && x[1] == 0 && x[2] == -1 && x[3] == -1, "");
+
+  constexpr auto y = FourCharsVecSize{1, 2, 3, 4} <=
+                     FourCharsVecSize{4, 3, 3, 1};
+  static_assert(y[0] == -1 && y[1] == -1 && y[2] == -1 && y[3] == 0, "");
+
+  constexpr auto z = FourCharsVecSize{1, 2, 3, 4} >=
+                     FourCharsVecSize{4, 3, 3, 1};
+  static_assert(z[0] == 0 && z[1] == 0 && z[2] == -1 && z[3] == -1, "");
+
+  constexpr auto A = FourCharsVecSize{1, 2, 3, 4} ==
+                     FourCharsVecSize{4, 3, 3, 1};
+  static_assert(A[0] == 0 && A[1] == 0 && A[2] == -1 && A[3] == 0, "");
+
+  constexpr auto B = FourCharsVecSize{1, 2, 3, 4} !=
+                     FourCharsVecSize{4, 3, 3, 1};
+  static_assert(B[0] == -1 && B[1] == -1 && B[2] == 0 && B[3] == -1, "");
+
+  constexpr auto C = FourCharsVecSize{1, 2, 3, 4} < 3;
+  static_assert(C[0] == -1 && C[1] == -1 && C[2] == 0 && C[3] == 0, "");
+
+  constexpr auto D = FourCharsVecSize{1, 2, 3, 4} > 3;
+  static_assert(D[0] == 0 && D[1] == 0 && D[2] == 0 && D[3] == -1, "");
+
+  constexpr auto E = FourCharsVecSize{1, 2, 3, 4} <= 3;
+  static_assert(E[0] == -1 && E[1] == -1 && E[2] == -1 && E[3] == 0, "");
+
+  constexpr auto F = FourCharsVecSize{1, 2, 3, 4} >= 3;
+  static_assert(F[0] == 0 && F[1] == 0 && F[2] == -1 && F[3] == -1, "");
+
+  constexpr auto G = FourCharsVecSize{1, 2, 3, 4} == 3;
+  static_assert(G[0] == 0 && G[1] == 0 && G[2] == -1 && G[3] == 0, "");
+
+  constexpr auto H = FourCharsVecSize{1, 2, 3, 4} != 3;
+  static_assert(H[0] == -1 && H[1] == -1 && H[2] == 0 && H[3] == -1, "");
+
+  constexpr auto O = FourCharsVecSize{5, 0, 6, 0} &&
+                     FourCharsVecSize{5, 5, 0, 0};
+  static_assert(O[0] == 1 && O[1] == 0 && O[2] == 0 && O[3] == 0, "");
+
+  constexpr auto P = FourCharsVecSize{5, 0, 6, 0} ||
+                     FourCharsVecSize{5, 5, 0, 0};
+  static_assert(P[0] == 1 && P[1] == 1 && P[2] == 1 && P[3] == 0, "");
+
+  constexpr auto Q = FourCharsVecSize{5, 0, 6, 0} && 3;
+  static_assert(Q[0] == 1 && Q[1] == 0 && Q[2] == 1 && Q[3] == 0, "");
+
+  constexpr auto R = FourCharsVecSize{5, 0, 6, 0} || 3;
+  static_assert(R[0] == 1 && R[1] == 1 && R[2] == 1 && R[3] == 1, "");
+
+  constexpr auto H1 = FourCharsVecSize{-1, -1, 0, -1};
+  constexpr auto InvH = -H1;
   static_assert(InvH[0] == 1 && InvH[1] == 1 && InvH[2] == 0 && InvH[3] == 1, "");
 
   constexpr auto ae = ~FourCharsVecSize{1, 2, 10, 20};
@@ -27,8 +83,65 @@ void CharUsage() {
 }
 
 void CharExtVecUsage() {
-  constexpr auto H = FourCharsExtVec{-1, -1, 0, -1};
-  constexpr auto InvH = -H;
+  constexpr auto w = FourCharsExtVec{1, 2, 3, 4} <
+                     FourCharsExtVec{4, 3, 2, 1};
+  static_assert(w[0] == -1 && w[1] == -1 && w[2] == 0 && w[3] == 0, "");
+
+  constexpr auto x = FourCharsExtVec{1, 2, 3, 4} >
+                     FourCharsExtVec{4, 3, 2, 1};
+  static_assert(x[0] == 0 && x[1] == 0 && x[2] == -1 && x[3] == -1, "");
+
+  constexpr auto y = FourCharsExtVec{1, 2, 3, 4} <=
+                     FourCharsExtVec{4, 3, 3, 1};
+  static_assert(y[0] == -1 && y[1] == -1 && y[2] == -1 && y[3] == 0, "");
+
+  constexpr auto z = FourCharsExtVec{1, 2, 3, 4} >=
+                     FourCharsExtVec{4, 3, 3, 1};
+  static_assert(z[0] == 0 && z[1] == 0 && z[2] == -1 && z[3] == -1, "");
+
+  constexpr auto A = FourCharsExtVec{1, 2, 3, 4} ==
+                     FourCharsExtVec{4, 3, 3, 1};
+  static_assert(A[0] == 0 && A[1] == 0 && A[2] == -1 && A[3] == 0, "");
+
+  constexpr auto B = FourCharsExtVec{1, 2, 3, 4} !=
+                     FourCharsExtVec{4, 3, 3, 1};
+  static_assert(B[0] == -1 && B[1] == -1 && B[2] == 0 && B[3] == -1, "");
+
+  constexpr auto C = FourCharsExtVec{1, 2, 3, 4} < 3;
+  static_assert(C[0] == -1 && C[1] == -1 && C[2] == 0 && C[3] == 0, "");
+
+  constexpr auto D = FourCharsExtVec{1, 2, 3, 4} > 3;
+  static_assert(D[0] == 0 && D[1] == 0 && D[2] == 0 && D[3] == -1, "");
+
+  constexpr auto E = FourCharsExtVec{1, 2, 3, 4} <= 3;
+  static_assert(E[0] == -1 && E[1] == -1 && E[2] == -1 && E[3] == 0, "");
+
+  constexpr auto F = FourCharsExtVec{1, 2, 3, 4} >= 3;
+  static_assert(F[0] == 0 && F[1] == 0 && F[2] == -1 && F[3] == -1, "");
+
+  constexpr auto G = FourCharsExtVec{1, 2, 3, 4} == 3;
+  static_assert(G[0] == 0 && G[1] == 0 && G[2] == -1 && G[3] == 0, "");
+
+  constexpr auto H = FourCharsExtVec{1, 2, 3, 4} != 3;
+  static_assert(H[0] == -1 && H[1] == -1 && H[2] == 0 && H[3] == -1, "");
+
+  constexpr auto O = FourCharsExtVec{5, 0, 6, 0} &&
+                     FourCharsExtVec{5, 5, 0, 0};
+  static_assert(O[0] == 1 && O[1] == 0 && O[2] == 0 && O[3] == 0, "");
+
+  constexpr auto P = FourCharsExtVec{5, 0, 6, 0} ||
+                     FourCharsExtVec{5, 5, 0, 0};
+  static_assert(P[0] == 1 && P[1] == 1 && P[2] == 1 && P[3] == 0, "");
+
+  constexpr auto Q = FourCharsExtVec{5, 0, 6, 0} && 3;
+  static_assert(Q[0] == 1 && Q[1] == 0 && Q[2] == 1 && Q[3] == 0, "");
+
+  constexpr auto R = FourCharsExtVec{5, 0, 6, 0} || 3;
+  static_assert(R[0] == 1 && R[1] == 1 && R[2] == 1 && R[3] == 1, "");
+
+
+  constexpr auto H1 = FourCharsExtVec{-1, -1, 0, -1};
+  constexpr auto InvH = -H1;
   static_assert(InvH[0] == 1 && InvH[1] == 1 && InvH[2] == 0 && InvH[3] == 1, "");
 
   constexpr auto ae = ~FourCharsExtVec{1, 2, 10, 20};
@@ -39,10 +152,75 @@ void CharExtVecUsage() {
 }
 
 void FloatUsage() {
+  constexpr auto w = FourFloatsVecSize{1, 2, 3, 4} <
+                     FourFloatsVecSize{4, 3, 2, 1};
+  static_assert(w[0] == -1 && w[1] == -1 && w[2] == 0 && w[3] == 0, "");
+
+  constexpr auto x = FourFloatsVecSize{1, 2, 3, 4} >
+                     FourFloatsVecSize{4, 3, 2, 1};
+  static_assert(x[0] == 0 && x[1] == 0 && x[2] == -1 && x[3] == -1, "");
+
+  constexpr auto y = FourFloatsVecSize{1, 2, 3, 4} <=
+                     FourFloatsVecSize{4, 3, 3, 1};
+  static_assert(y[0] == -1 && y[1] == -1 && y[2] == -1 && y[3] == 0, "");
+
+  constexpr auto z = FourFloatsVecSize{1, 2, 3, 4} >=
+                     FourFloatsVecSize{4, 3, 3, 1};
+  static_assert(z[0] == 0 && z[1] == 0 && z[2] == -1 && z[3] == -1, "");
+
+  constexpr auto A = FourFloatsVecSize{1, 2, 3, 4} ==
+                     FourFloatsVecSize{4, 3, 3, 1};
+  static_assert(A[0] == 0 && A[1] == 0 && A[2] == -1 && A[3] == 0, "");
+
+  constexpr auto B = FourFloatsVecSize{1, 2, 3, 4} !=
+                     FourFloatsVecSize{4, 3, 3, 1};
+  static_assert(B[0] == -1 && B[1] == -1 && B[2] == 0 && B[3] == -1, "");
+
+  constexpr auto C = FourFloatsVecSize{1, 2, 3, 4} < 3;
+  static_assert(C[0] == -1 && C[1] == -1 && C[2] == 0 && C[3] == 0, "");
+
+  constexpr auto D = FourFloatsVecSize{1, 2, 3, 4} > 3;
+  static_assert(D[0] == 0 && D[1] == 0 && D[2] == 0 && D[3] == -1, "");
+
+  constexpr auto E = FourFloatsVecSize{1, 2, 3, 4} <= 3;
+  static_assert(E[0] == -1 && E[1] == -1 && E[2] == -1 && E[3] == 0, "");
+
+  constexpr auto F = FourFloatsVecSize{1, 2, 3, 4} >= 3;
+  static_assert(F[0] == 0 && F[1] == 0 && F[2] == -1 && F[3] == -1, "");
+
+  constexpr auto G = FourFloatsVecSize{1, 2, 3, 4} == 3;
+  static_assert(G[0] == 0 && G[1] == 0 && G[2] == -1 && G[3] == 0, "");
+
+  constexpr auto H = FourFloatsVecSize{1, 2, 3, 4} != 3;
+  static_assert(H[0] == -1 && H[1] == -1 && H[2] == 0 && H[3] == -1, "");
+
+  constexpr auto O1 = FourFloatsVecSize{5, 0, 6, 0} &&
+                     FourFloatsVecSize{5, 5, 0, 0};
+  static_assert(O1[0] == 1 && O1[1] == 0 && O1[2] == 0 && O1[3] == 0, "");
+
+  constexpr auto P1 = FourFloatsVecSize{5, 0, 6, 0} ||
+                     FourFloatsVecSize{5, 5, 0, 0};
+  static_assert(P1[0] == 1 && P1[1] == 1 && P1[2] == 1 && P1[3] == 0, "");
+
+  constexpr auto Q = FourFloatsVecSize{5, 0, 6, 0} && 3;
+  static_assert(Q[0] == 1 && Q[1] == 0 && Q[2] == 1 && Q[3] == 0, "");
+
+  constexpr auto R = FourFloatsVecSize{5, 0, 6, 0} || 3;
+  static_assert(R[0] == 1 && R[1] == 1 && R[2] == 1 && R[3] == 1, "");
+
+
   constexpr auto Y = FourFloatsVecSize{1.200000e+01, 1.700000e+01, -1.000000e+00, -1.000000e+00};
   constexpr auto Z = -Y;
   static_assert(Z[0] == -1.200000e+01 && Z[1] == -1.700000e+01 && Z[2] == 1.000000e+00 && Z[3] == 1.000000e+00, "");
 
+  constexpr auto O = FourFloatsVecSize{5, 0, 6, 0} &&
+                     FourFloatsVecSize{5, 5, 0, 0};
+  static_assert(O[0] == 1 && O[1] == 0 && O[2] == 0 && O[3] == 0, "");
+
+  constexpr auto P = FourFloatsVecSize{5, 0, 6, 0} ||
+                     FourFloatsVecSize{5, 5, 0, 0};
+  static_assert(P[0] == 1 && P[1] == 1 && P[2] == 1 && P[3] == 0, "");
+
   // Operator ~ is illegal on floats.
   constexpr auto ae = ~FourFloatsVecSize{0, 1, 8, -1}; // expected-error {{invalid argument type}}
 
@@ -51,6 +229,62 @@ void FloatUsage() {
 }
 
 void FloatVecUsage() {
+  constexpr auto w = FourFloatsVecSize{1, 2, 3, 4} <
+                     FourFloatsVecSize{4, 3, 2, 1};
+  static_assert(w[0] == -1 && w[1] == -1 && w[2] == 0 && w[3] == 0, "");
+
+  constexpr auto x = FourFloatsVecSize{1, 2, 3, 4} >
+                     FourFloatsVecSize{4, 3, 2, 1};
+  static_assert(x[0] == 0 && x[1] == 0 && x[2] == -1 && x[2] == -1, "");
+
+  constexpr auto y = FourFloatsVecSize{1, 2, 3, 4} <=
+                     FourFloatsVecSize{4, 3, 3, 1};
+  static_assert(y[0] == -1 && y[1] == -1 && y[2] == -1 && y[3] == 0, "");
+
+  constexpr auto z = FourFloatsVecSize{1, 2, 3, 4} >=
+                     FourFloatsVecSize{4, 3, 3, 1};
+  static_assert(z[0] == 0 && z[1] == 0 && z[2] == -1 && z[3] == -1, "");
+
+  constexpr auto A = FourFloatsVecSize{1, 2, 3, 4} ==
+                     FourFloatsVecSize{4, 3, 3, 1};
+  static_assert(A[0] == 0 && A[1] == 0 && A[2] == -1 && A[3] == 0, "");
+
+  constexpr auto B = FourFloatsVecSize{1, 2, 3, 4} !=
+                     FourFloatsVecSize{4, 3, 3, 1};
+  static_assert(B[0] == -1 && B[1] == -1 && B[2] == 0 && B[3] == -1, "");
+
+  constexpr auto C = FourFloatsVecSize{1, 2, 3, 4} < 3;
+  static_assert(C[0] == -1 && C[1] == -1 && C[2] == 0 && C[3] == 0, "");
+
+  constexpr auto D = FourFloatsVecSize{1, 2, 3, 4} > 3;
+  static_assert(D[0] == 0 && D[1] == 0 && D[2] == 0 && D[3] == -1, "");
+
+  constexpr auto E = FourFloatsVecSize{1, 2, 3, 4} <= 3;
+  static_assert(E[0] == -1 && E[1] == -1 && E[2] == -1 && E[3] == 0, "");
+
+  constexpr auto F = FourFloatsVecSize{1, 2, 3, 4} >= 3;
+  static_assert(F[0] == 0 && F[1] == 0 && F[2] == -1 && F[3] == -1, "");
+
+  constexpr auto G = FourFloatsVecSize{1, 2, 3, 4} == 3;
+  static_assert(G[0] == 0 && G[1] == 0 && G[2] == -1 && G[3] == 0, "");
+
+  constexpr auto H = FourFloatsVecSize{1, 2, 3, 4} != 3;
+  static_assert(H[0] == -1 && H[1] == -1 && H[2] == 0 && H[3] == -1, "");
+
+  constexpr auto O = FourFloatsVecSize{5, 0, 6, 0} &&
+                     FourFloatsVecSize{5, 5, 0, 0};
+  static_assert(O[0] == 1 && O[1] == 0 && O[2] == 0 && O[3] == 0, "");
+
+  constexpr auto P = FourFloatsVecSize{5, 0, 6, 0} ||
+                     FourFloatsVecSize{5, 5, 0, 0};
+  static_assert(P[0] == 1 && P[1] == 1 && P[2] == 1 && P[3] == 0, "");
+
+  constexpr auto Q = FourFloatsVecSize{5, 0, 6, 0} && 3;
+  static_assert(Q[0] == 1 && Q[1] == 0 && Q[2] == 1 && Q[3] == 0, "");
+
+  constexpr auto R = FourFloatsVecSize{5, 0, 6, 0} || 3;
+  static_assert(R[0] == 1 && R[1] == 1 && R[2] == 1 && R[3] == 1, "");
+
   constexpr auto Y = FourFloatsVecSize{1.200000e+01, 1.700000e+01, -1.000000e+00, -1.000000e+00};
   constexpr auto Z = -Y;
   static_assert(Z[0] == -1.200000e+01 && Z[1] == -1.700000e+01 && Z[2] == 1.000000e+00 && Z[3] == 1.000000e+00, "");
@@ -63,6 +297,24 @@ void FloatVecUsage() {
 }
 
 void I128Usage() {
+  constexpr auto a = FourI128VecSize{1, 2, 3, 4};
+  static_assert(a[0] == 1 && a[1] == 2 && a[2] == 3 && a[3] == 4, "");
+
+  constexpr auto a1 = FourI128VecSize{5, 0, 6, 0} && FourI128VecSize{5, 5, 0, 0};
+  static_assert(a1[0] == 1 && a1[1] == 0 && a1[2] == 0 && a1[3] == 0, "");
+
+  constexpr auto a2 = FourI128VecSize{5, 0, 6, 0} || FourI128VecSize{5, 5, 0, 0};
+  static_assert(a2[0] == 1 && a2[1] == 1 && a2[2] == 1 && a2[3] == 0, "");
+
+  constexpr auto Q = FourI128VecSize{5, 0, 6, 0} && 3;
+  static_assert(Q[0] == 1 && Q[1] == 0 && Q[2] == 1 && Q[3] == 0, "");
+
+  constexpr auto R = FourI128VecSize{5, 0, 6, 0} || 3;
+  static_assert(R[0] == 1 && R[1] == 1 && R[2] == 1 && R[3] == 1, "");
+
+  constexpr auto b = a < 3;
+  static_assert(b[0] == -1 && b[1] == -1 && b[2] == 0 && b[3] == 0, "");
+
   // Operator ~ is illegal on floats, so no test for that.
   constexpr auto c = ~FourI128VecSize{1, 2, 10, 20};
    static_assert(c[0] == -2 && c[1] == -3 && c[2] == -11 && c[3] == -21, "");
@@ -72,6 +324,24 @@ void I128Usage() {
 }
 
 void I128VecUsage() {
+  constexpr auto a = FourI128ExtVec{1, 2, 3, 4};
+  static_assert(a[0] == 1 && a[1] == 2 && a[2] == 3 && a[3] == 4, "");
+
+  constexpr auto a1 = FourI128ExtVec{5, 0, 6, 0} && FourI128ExtVec{5, 5, 0, 0};
+  static_assert(a1[0] == 1 && a1[1] == 0 && a1[2] == 0 && a1[3] == 0, "");
+
+  constexpr auto a2 = FourI128ExtVec{5, 0, 6, 0} || FourI128ExtVec{5, 5, 0, 0};
+  static_assert(a2[0] == 1 && a2[1] == 1 && a2[2] == 1 && a2[3] == 0, "");
+
+  constexpr auto Q = FourI128ExtVec{5, 0, 6, 0} && 3;
+  static_assert(Q[0] == 1 && Q[1] == 0 && Q[2] == 1 && Q[3] == 0, "");
+
+  constexpr auto R = FourI128ExtVec{5, 0, 6, 0} || 3;
+  static_assert(R[0] == 1 && R[1] == 1 && R[2] == 1 && R[3] == 1, "");
+
+  constexpr auto b = a < 3;
+  static_assert(b[0] == -1 && b[1] == -1 && b[2] == 0 && b[3] == 0, "");
+
   // Operator ~ is illegal on floats, so no test for that.
   constexpr auto c = ~FourI128ExtVec{1, 2, 10, 20};
   static_assert(c[0] == -2 && c[1] == -3 && c[2] == -11 && c[3] == -21, "");
@@ -82,6 +352,30 @@ void I128VecUsage() {
 
 using FourBoolsExtVec __attribute__((ext_vector_type(4))) = bool;
 void BoolVecUsage() {
+  constexpr auto a = FourBoolsExtVec{true, false, true, false} <
+                     FourBoolsExtVec{false, false, true, true};
+  static_assert(a[0] == false && a[1] == false && a[2] == false && a[3] == true, "");
+
+  constexpr auto b = FourBoolsExtVec{true, false, true, false} <=
+                     FourBoolsExtVec{false, false, true, true};
+  static_assert(b[0] == false && b[1] == true && b[2] == true && b[3] == true, "");
+
+  constexpr auto c = FourBoolsExtVec{true, false, true, false} ==
+                     FourBoolsExtVec{false, false, true, true};
+  static_assert(c[0] == false && c[1] == true && c[2] == true && c[3] == false, "");
+
+  constexpr auto d = FourBoolsExtVec{true, false, true, false} !=
+                     FourBoolsExtVec{false, false, true, true};
+  static_assert(d[0] == true && d[1] == false && d[2] == false && d[3] == true, "");
+
+  constexpr auto e = FourBoolsExtVec{true, false, true, false} >=
+                     FourBoolsExtVec{false, false, true, true};
+  static_assert(e[0] == true && e[1] == true && e[2] == true && e[3] == false, "");
+
+  constexpr auto f = FourBoolsExtVec{true, false, true, false} >
+                     FourBoolsExtVec{false, false, true, true};
+  static_assert(f[0] == true && f[1] == false && f[2] == false && f[3] == false, "");
+
   constexpr auto j = !FourBoolsExtVec{true, false, true, false};
   static_assert(j[0] == false && j[1] == true && j[2] == false && j[3] == true, "");
 
diff --git clang/test/AST/ByteCode/cxx11.cpp clang/test/AST/ByteCode/cxx11.cpp
index 481e3da9289e..86b58283023b 100644
--- clang/test/AST/ByteCode/cxx11.cpp
+++ clang/test/AST/ByteCode/cxx11.cpp
@@ -169,3 +169,8 @@ namespace FinalLtorDiags {
   A<q> c; // both-error {{non-type template argument of type 'int *' is not a constant expression}} \
           // both-note {{read of non-constexpr variable 'q' is not allowed in a constant expression}}
 }
+
+void lambdas() {
+  int d;
+  int a9[1] = {[d = 0] = 1}; // both-error {{not an integral constant expression}}
+}
diff --git clang/test/AST/ByteCode/cxx20.cpp clang/test/AST/ByteCode/cxx20.cpp
index 77a967d42c4e..9bbc3dbe0073 100644
--- clang/test/AST/ByteCode/cxx20.cpp
+++ clang/test/AST/ByteCode/cxx20.cpp
@@ -641,7 +641,7 @@ namespace ThreeWayCmp {
   constexpr const int *pa2 = &a[2];
   constexpr const int *pb1 = &b[1];
   static_assert(pa1 <=> pb1 != 0, ""); // both-error {{not an integral constant expression}} \
-                                       // both-note {{has unspecified value}} \
+                                       // both-note {{has unspecified value}}
   static_assert(pa1 <=> pa1 == 0, "");
   static_assert(pa1 <=> pa2 == -1, "");
   static_assert(pa2 <=> pa1 == 1, "");
diff --git clang/test/AST/ByteCode/cxx2a.cpp clang/test/AST/ByteCode/cxx2a.cpp
index ad021b30cfd3..eaae978e0118 100644
--- clang/test/AST/ByteCode/cxx2a.cpp
+++ clang/test/AST/ByteCode/cxx2a.cpp
@@ -34,3 +34,79 @@ namespace Covariant {
   constexpr const Covariant1 *cb1 = &cb;
   static_assert(cb1->f()->a == 'Z');
 }
+
+namespace DtorOrder {
+  struct Buf {
+    char buf[64];
+    int n = 0;
+    constexpr void operator+=(char c) { buf[n++] = c; }
+    constexpr bool operator==(const char *str) const {
+      if (str[n] != 0)
+        return false;
+
+      for (int i = 0; i < n; ++i) {
+        if (buf[n] != str[n])
+          return false;
+      }
+      return true;
+
+      return __builtin_memcmp(str, buf, n) == 0;
+    }
+    constexpr bool operator!=(const char *str) const { return !operator==(str); }
+  };
+
+  struct A {
+    constexpr A(Buf &buf, char c) : buf(buf), c(c) { buf += c; }
+    constexpr ~A() { buf += (c - 32);}
+    constexpr operator bool() const { return true; }
+    Buf &buf;
+    char c;
+  };
+
+  constexpr void abnormal_termination(Buf &buf) {
+    struct Indestructible {
+      constexpr ~Indestructible(); // not defined
+    };
+    A a(buf, 'a');
+    A(buf, 'b');
+    int n = 0;
+
+    for (A &&c = A(buf, 'c'); A d = A(buf, 'd'); A(buf, 'e')) {
+      switch (A f(buf, 'f'); A g = A(buf, 'g')) { // both-warning {{boolean}}
+      case false: {
+        A x(buf, 'x');
+      }
+
+      case true: {
+        A h(buf, 'h');
+        switch (n++) {
+        case 0:
+          break;
+        case 1:
+          continue;
+        case 2:
+          return;
+        }
+        break;
+      }
+
+      default:
+        Indestructible indest;
+      }
+
+      A j = (A(buf, 'i'), A(buf, 'j'));
+    }
+  }
+
+  constexpr bool check_abnormal_termination() {
+    Buf buf = {};
+    abnormal_termination(buf);
+    return buf ==
+      "abBc"
+        "dfgh" /*break*/ "HGFijIJeED"
+        "dfgh" /*continue*/ "HGFeED"
+        "dfgh" /*return*/ "HGFD"
+      "CA";
+  }
+  static_assert(check_abnormal_termination());
+}
diff --git clang/test/AST/ByteCode/initializer_list.cpp clang/test/AST/ByteCode/initializer_list.cpp
new file mode 100644
index 000000000000..f882e4ff1b12
--- /dev/null
+++ clang/test/AST/ByteCode/initializer_list.cpp
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -fms-extensions -std=c++20 -verify=expected,both %s
+// RUN: %clang_cc1 -std=c++20 -fms-extensions -verify=ref,both %s
+
+namespace std {
+  typedef decltype(sizeof(int)) size_t;
+  template <class _E>
+  class initializer_list
+  {
+    const _E* __begin_;
+    size_t    __size_;
+
+    initializer_list(const _E* __b, size_t __s)
+      : __begin_(__b),
+        __size_(__s)
+    {}
+
+  public:
+    typedef _E        value_type;
+    typedef const _E& reference;
+    typedef const _E& const_reference;
+    typedef size_t    size_type;
+
+    typedef const _E* iterator;
+    typedef const _E* const_iterator;
+
+    constexpr initializer_list() : __begin_(nullptr), __size_(0) {}
+
+    constexpr size_t    size()  const {return __size_;}
+    constexpr const _E* begin() const {return __begin_;}
+    constexpr const _E* end()   const {return __begin_ + __size_;}
+  };
+}
+
+class Thing {
+public:
+  int m = 12;
+  constexpr Thing(int m) : m(m) {}
+  constexpr bool operator==(const Thing& that) const {
+    return this->m == that.m;
+  }
+};
+
+constexpr bool is_contained(std::initializer_list<Thing> Set, const Thing &Element) {
+   return (*Set.begin() == Element);
+}
+
+constexpr int foo() {
+  const Thing a{12};
+  const Thing b{14};
+  return is_contained({a}, b);
+}
+
+static_assert(foo() == 0);
+
+
+namespace rdar13395022 {
+  struct MoveOnly { // both-note {{candidate}}
+    MoveOnly(MoveOnly&&); // both-note 2{{copy constructor is implicitly deleted because}} both-note {{candidate}}
+  };
+
+  void test(MoveOnly mo) {
+    auto &&list1 = {mo}; // both-error {{call to implicitly-deleted copy constructor}} both-note {{in initialization of temporary of type 'std::initializer_list}}
+    MoveOnly (&&list2)[1] = {mo}; // both-error {{call to implicitly-deleted copy constructor}} both-note {{in initialization of temporary of type 'MoveOnly[1]'}}
+    std::initializer_list<MoveOnly> &&list3 = {};
+    MoveOnly (&&list4)[1] = {}; // both-error {{no matching constructor}}
+    // both-note@-1 {{in implicit initialization of array element 0 with omitted initializer}}
+    // both-note@-2 {{in initialization of temporary of type 'MoveOnly[1]' created to list-initialize this reference}}
+  }
+}
+
+
diff --git clang/test/AST/ByteCode/literals.cpp clang/test/AST/ByteCode/literals.cpp
index 2329d4d973f0..13d6c4feb350 100644
--- clang/test/AST/ByteCode/literals.cpp
+++ clang/test/AST/ByteCode/literals.cpp
@@ -199,12 +199,8 @@ namespace PointerComparison {
   constexpr bool v3 = null == pv; // ok
   constexpr bool v4 = qv == pv; // ok
 
-  /// FIXME: These two are rejected by the current interpreter, but
-  ///   accepted by GCC.
-  constexpr bool v5 = qv >= pv; // ref-error {{constant expression}} \
-                                // ref-note {{unequal pointers to void}}
-  constexpr bool v8 = qv > (void*)&s.a; // ref-error {{constant expression}} \
-                                        // ref-note {{unequal pointers to void}}
+  constexpr bool v5 = qv >= pv;
+  constexpr bool v8 = qv > (void*)&s.a;
   constexpr bool v6 = qv > null; // both-error {{must be initialized by a constant expression}} \
                                  // both-note {{comparison between '&s.b' and 'nullptr' has unspecified value}}
 
diff --git clang/test/AST/ByteCode/new-delete.cpp clang/test/AST/ByteCode/new-delete.cpp
index 145bb366710f..76858aa94bb3 100644
--- clang/test/AST/ByteCode/new-delete.cpp
+++ clang/test/AST/ByteCode/new-delete.cpp
@@ -586,6 +586,138 @@ constexpr void use_after_free_2() { // both-error {{never produces a constant ex
   p->f(); // both-note {{member call on heap allocated object that has been deleted}}
 }
 
+
+/// std::allocator definition
+namespace std {
+  using size_t = decltype(sizeof(0));
+  template<typename T> struct allocator {
+    constexpr T *allocate(size_t N) {
+      return (T*)__builtin_operator_new(sizeof(T) * N); // both-note 2{{allocation performed here}}
+    }
+    constexpr void deallocate(void *p) {
+      __builtin_operator_delete(p); // both-note 2{{std::allocator<...>::deallocate' used to delete pointer to object allocated with 'new'}} \
+                                    // both-note {{used to delete a null pointer}}
+    }
+  };
+}
+
+/// Specialization for float, using operator new/delete.
+namespace std {
+  using size_t = decltype(sizeof(0));
+  template<> struct allocator<float> {
+    constexpr float *allocate(size_t N) {
+      return (float*)operator new (sizeof(float) * N);
+    }
+    constexpr void deallocate(void *p) {
+      operator delete(p);
+    }
+  };
+}
+
+namespace OperatorNewDelete {
+
+  constexpr bool mismatched(int alloc_kind, int dealloc_kind) {
+    int *p;
+    switch (alloc_kind) {
+    case 0:
+      p = new int; // both-note {{heap allocation performed here}}
+      break;
+    case 1:
+      p = new int[1]; // both-note {{heap allocation performed here}}
+      break;
+    case 2:
+      p = std::allocator<int>().allocate(1);
+      break;
+    }
+    switch (dealloc_kind) {
+    case 0:
+      delete p; // both-note {{'delete' used to delete pointer to object allocated with 'std::allocator<...>::allocate'}}
+      break;
+    case 1:
+      delete[] p; // both-note {{'delete' used to delete pointer to object allocated with 'std::allocator<...>::allocate'}}
+      break;
+    case 2:
+      std::allocator<int>().deallocate(p); // both-note 2{{in call}}
+      break;
+    }
+    return true;
+  }
+  static_assert(mismatched(0, 2)); // both-error {{constant expression}} \
+                                   // both-note {{in call to}}
+  static_assert(mismatched(1, 2)); // both-error {{constant expression}} \
+                                   // both-note {{in call to}}
+  static_assert(mismatched(2, 0)); // both-error {{constant expression}} \
+                                   // both-note {{in call}}
+  static_assert(mismatched(2, 1)); // both-error {{constant expression}} \
+                                   // both-note {{in call}}
+  static_assert(mismatched(2, 2));
+
+  constexpr bool zeroAlloc() {
+    int *F = std::allocator<int>().allocate(0);
+    std::allocator<int>().deallocate(F);
+    return true;
+  }
+  static_assert(zeroAlloc());
+
+  /// FIXME: This is broken in the current interpreter.
+  constexpr int arrayAlloc() {
+    int *F = std::allocator<int>().allocate(2);
+    F[0] = 10; // ref-note {{assignment to object outside its lifetime is not allowed in a constant expression}}
+    F[1] = 13;
+    int Res = F[1] + F[0];
+    std::allocator<int>().deallocate(F);
+    return Res;
+  }
+  static_assert(arrayAlloc() == 23); // ref-error {{not an integral constant expression}} \
+                                     // ref-note {{in call to}}
+
+  struct S {
+    int i;
+    constexpr S(int i) : i(i) {}
+    constexpr ~S() { }
+  };
+
+  /// FIXME: This is broken in the current interpreter.
+  constexpr bool structAlloc() {
+    S *s = std::allocator<S>().allocate(1);
+
+    s->i = 12; // ref-note {{assignment to object outside its lifetime is not allowed in a constant expression}}
+
+    bool Res = (s->i == 12);
+    std::allocator<S>().deallocate(s);
+
+    return Res;
+  }
+  static_assert(structAlloc()); // ref-error {{not an integral constant expression}} \
+                                // ref-note {{in call to}}
+
+  constexpr bool structAllocArray() {
+    S *s = std::allocator<S>().allocate(9);
+
+    s[2].i = 12; // ref-note {{assignment to object outside its lifetime is not allowed in a constant expression}}
+    bool Res = (s[2].i == 12);
+    std::allocator<S>().deallocate(s);
+
+    return Res;
+  }
+  static_assert(structAllocArray()); // ref-error {{not an integral constant expression}} \
+                                     // ref-note {{in call to}}
+
+  constexpr bool alloc_from_user_code() {
+    void *p = __builtin_operator_new(sizeof(int)); // both-note {{cannot allocate untyped memory in a constant expression; use 'std::allocator<T>::allocate'}}
+    __builtin_operator_delete(p);
+    return true;
+  }
+  static_assert(alloc_from_user_code()); // both-error {{constant expression}} \
+                                         // both-note {{in call to}}
+
+
+  constexpr int no_deallocate_nullptr = (std::allocator<int>().deallocate(nullptr), 1); // both-error {{constant expression}} \
+                                                                                        // both-note {{in call}}
+
+  static_assert((std::allocator<float>().deallocate(std::allocator<float>().allocate(10)), 1) == 1);
+}
+
 #else
 /// Make sure we reject this prior to C++20
 constexpr int a() { // both-error {{never produces a constant expression}}
diff --git clang/test/AST/HLSL/RWBuffer-AST.hlsl clang/test/AST/HLSL/RWBuffer-AST.hlsl
index 1f6ef60e121e..0e7803ce50a8 100644
--- clang/test/AST/HLSL/RWBuffer-AST.hlsl
+++ clang/test/AST/HLSL/RWBuffer-AST.hlsl
@@ -30,8 +30,7 @@ RWBuffer<float> Buffer;
 // CHECK-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit class RWBuffer definition
 
 // CHECK: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
-// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h 'element_type *'
-// CHECK-NEXT: HLSLResourceClassAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit UAV
+// CHECK-NEXT: implicit h 'element_type * {{\[\[}}hlsl::resource_class(UAV)]]':'element_type *'
 // CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit TypedBuffer
 
 // CHECK: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &const (unsigned int) const'
@@ -39,7 +38,7 @@ RWBuffer<float> Buffer;
 // CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
 // CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
 // CHECK-NEXT: ArraySubscriptExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type' lvalue
-// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}}
+// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type * {{\[\[}}hlsl::resource_class(UAV)]]':'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}}
 // CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'const RWBuffer<element_type>' lvalue implicit this
 // CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int'
 // CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
@@ -49,7 +48,7 @@ RWBuffer<float> Buffer;
 // CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
 // CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
 // CHECK-NEXT: ArraySubscriptExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type' lvalue
-// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}}
+// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type * {{\[\[}}hlsl::resource_class(UAV)]]':'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}}
 // CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'RWBuffer<element_type>' lvalue implicit this
 // CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int'
 // CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
@@ -59,6 +58,5 @@ RWBuffer<float> Buffer;
 // CHECK: TemplateArgument type 'float'
 // CHECK-NEXT: BuiltinType 0x{{[0-9A-Fa-f]+}} 'float'
 // CHECK-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
-// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit referenced h 'float *'
-// CHECK-NEXT: HLSLResourceClassAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit UAV
+// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit referenced h 'float * {{\[\[}}hlsl::resource_class(UAV)]]':'float *'
 // CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit TypedBuffer
diff --git clang/test/AST/HLSL/StructuredBuffer-AST.hlsl clang/test/AST/HLSL/StructuredBuffer-AST.hlsl
new file mode 100644
index 000000000000..11d84ac7b85d
--- /dev/null
+++ clang/test/AST/HLSL/StructuredBuffer-AST.hlsl
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -DEMPTY %s | FileCheck -check-prefix=EMPTY %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump %s | FileCheck %s
+
+
+// This test tests two different AST generations. The "EMPTY" test mode verifies
+// the AST generated by forward declaration of the HLSL types which happens on
+// initializing the HLSL external AST with an AST Context.
+
+// The non-empty mode has a use that requires the StructuredBuffer type be complete,
+// which results in the AST being populated by the external AST source. That
+// case covers the full implementation of the template declaration and the
+// instantiated specialization.
+
+// EMPTY: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit StructuredBuffer
+// EMPTY-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type
+// EMPTY-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit <undeserialized declarations> class StructuredBuffer
+// EMPTY-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
+
+// There should be no more occurrances of StructuredBuffer
+// EMPTY-NOT: StructuredBuffer
+
+#ifndef EMPTY
+
+StructuredBuffer<float> Buffer;
+
+#endif
+
+// CHECK: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit StructuredBuffer
+// CHECK-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type
+// CHECK-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit class StructuredBuffer definition
+
+// CHECK: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
+// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h 'element_type * {{\[\[}}hlsl::resource_class(UAV)]]':'element_type *'
+// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit TypedBuffer
+
+// CHECK: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &const (unsigned int) const'
+// CHECK-NEXT: ParmVarDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> Idx 'unsigned int'
+// CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
+// CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
+// CHECK-NEXT: ArraySubscriptExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type' lvalue
+// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type * {{\[\[}}hlsl::resource_class(UAV)]]':'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}}
+// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'const StructuredBuffer<element_type>' lvalue implicit this
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int'
+// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
+
+// CHECK-NEXT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &(unsigned int)'
+// CHECK-NEXT: ParmVarDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> Idx 'unsigned int'
+// CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
+// CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
+// CHECK-NEXT: ArraySubscriptExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type' lvalue
+// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type * {{\[\[}}hlsl::resource_class(UAV)]]':'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}}
+// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'StructuredBuffer<element_type>' lvalue implicit this
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int'
+// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
+
+// CHECK: ClassTemplateSpecializationDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class StructuredBuffer definition
+
+// CHECK: TemplateArgument type 'float'
+// CHECK-NEXT: BuiltinType 0x{{[0-9A-Fa-f]+}} 'float'
+// CHECK-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
+// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit referenced h 'float * {{\[\[}}hlsl::resource_class(UAV)]]':'float *'
+// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit TypedBuffer
diff --git clang/test/AST/ast-dump-default-init-json.cpp clang/test/AST/ast-dump-default-init-json.cpp
index 1058b4e3ea4d..f4949a9c9eed 100644
--- clang/test/AST/ast-dump-default-init-json.cpp
+++ clang/test/AST/ast-dump-default-init-json.cpp
@@ -789,10 +789,10 @@ void test() {
 // CHECK-NEXT:                  "valueCategory": "lvalue",
 // CHECK-NEXT:                  "extendingDecl": {
 // CHECK-NEXT:                   "id": "0x{{.*}}",
-// CHECK-NEXT:                   "kind": "FieldDecl",
-// CHECK-NEXT:                   "name": "a",
+// CHECK-NEXT:                   "kind": "VarDecl",
+// CHECK-NEXT:                   "name": "b",
 // CHECK-NEXT:                   "type": {
-// CHECK-NEXT:                    "qualType": "const A &"
+// CHECK-NEXT:                    "qualType": "B"
 // CHECK-NEXT:                   }
 // CHECK-NEXT:                  },
 // CHECK-NEXT:                  "storageDuration": "automatic",
diff --git clang/test/AST/ast-dump-default-init.cpp clang/test/AST/ast-dump-default-init.cpp
index 15b29f04bf21..26864fbf1542 100644
--- clang/test/AST/ast-dump-default-init.cpp
+++ clang/test/AST/ast-dump-default-init.cpp
@@ -13,7 +13,7 @@ void test() {
 }
 // CHECK: -CXXDefaultInitExpr 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue has rewritten init
 // CHECK-NEXT:  `-ExprWithCleanups 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue
-// CHECK-NEXT:    `-MaterializeTemporaryExpr 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue extended by Field 0x{{[^ ]*}} 'a' 'const A &'
+// CHECK-NEXT:    `-MaterializeTemporaryExpr 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue extended by Var 0x{{[^ ]*}} 'b' 'B'
 // CHECK-NEXT:      `-ImplicitCastExpr 0x{{[^ ]*}} <{{.*}}> 'const A' <NoOp>
 // CHECK-NEXT:        `-CXXFunctionalCastExpr 0x{{[^ ]*}} <{{.*}}> 'A' functional cast to A <NoOp>
 // CHECK-NEXT:          `-InitListExpr 0x{{[^ ]*}} <{{.*}}> 'A'
diff --git clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp
new file mode 100644
index 000000000000..01527addb529
--- /dev/null
+++ clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp
@@ -0,0 +1,232 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=webkit.RefCntblBaseVirtualDtor -verify %s
+
+#include "mock-types.h"
+
+namespace Detail {
+
+template<typename Out, typename... In>
+class CallableWrapperBase {
+public:
+    virtual ~CallableWrapperBase() { }
+    virtual Out call(In...) = 0;
+};
+
+template<typename, typename, typename...> class CallableWrapper;
+
+template<typename CallableType, typename Out, typename... In>
+class CallableWrapper : public CallableWrapperBase<Out, In...> {
+public:
+    explicit CallableWrapper(CallableType&& callable)
+        : m_callable(WTFMove(callable)) { }
+    CallableWrapper(const CallableWrapper&) = delete;
+    CallableWrapper& operator=(const CallableWrapper&) = delete;
+    Out call(In... in) final;
+private:
+    CallableType m_callable;
+};
+
+} // namespace Detail
+
+template<typename> class Function;
+
+template<typename Out, typename... In> Function<Out(In...)> adopt(Detail::CallableWrapperBase<Out, In...>*);
+
+template <typename Out, typename... In>
+class Function<Out(In...)> {
+public:
+    using Impl = Detail::CallableWrapperBase<Out, In...>;
+
+    Function() = default;
+
+    template<typename FunctionType>
+    Function(FunctionType f);
+
+    Out operator()(In... in) const;
+    explicit operator bool() const { return !!m_callableWrapper; }
+
+private:
+    enum AdoptTag { Adopt };
+    Function(Impl* impl, AdoptTag)
+        : m_callableWrapper(impl)
+    {
+    }
+
+    friend Function adopt<Out, In...>(Impl*);
+
+    Impl* m_callableWrapper;
+};
+
+template<typename Out, typename... In> Function<Out(In...)> adopt(Detail::CallableWrapperBase<Out, In...>* impl)
+{
+    return Function<Out(In...)>(impl, Function<Out(In...)>::Adopt);
+}
+
+template<typename T, typename PtrTraits = RawPtrTraits<T>, typename RefDerefTraits = DefaultRefDerefTraits<T>> Ref<T, PtrTraits, RefDerefTraits> adoptRef(T&);
+
+template<typename T, typename _PtrTraits, typename RefDerefTraits>
+inline Ref<T, _PtrTraits, RefDerefTraits> adoptRef(T& reference)
+{
+    return Ref<T, _PtrTraits, RefDerefTraits>(reference);
+}
+
+enum class DestructionThread : unsigned char { Any, Main, MainRunLoop };
+void ensureOnMainThread(Function<void()>&&); // Sync if called on main thread, async otherwise.
+void ensureOnMainRunLoop(Function<void()>&&); // Sync if called on main run loop, async otherwise.
+
+class ThreadSafeRefCountedBase {
+public:
+    ThreadSafeRefCountedBase() = default;
+
+    void ref() const
+    {
+        ++m_refCount;
+    }
+
+    bool hasOneRef() const
+    {
+        return refCount() == 1;
+    }
+
+    unsigned refCount() const
+    {
+        return m_refCount;
+    }
+
+protected:
+    bool derefBase() const
+    {
+      if (!--m_refCount) {
+          m_refCount = 1;
+          return true;
+      }
+      return false;
+    }
+
+private:
+    mutable unsigned m_refCount { 1 };
+};
+
+template<class T, DestructionThread destructionThread = DestructionThread::Any> class ThreadSafeRefCounted : public ThreadSafeRefCountedBase {
+public:
+    void deref() const
+    {
+        if (!derefBase())
+            return;
+
+        if constexpr (destructionThread == DestructionThread::Any) {
+            delete static_cast<const T*>(this);
+        } else if constexpr (destructionThread == DestructionThread::Main) {
+            ensureOnMainThread([this] {
+                delete static_cast<const T*>(this);
+            });
+        }
+    }
+
+protected:
+    ThreadSafeRefCounted() = default;
+};
+
+class FancyRefCountedClass final : public ThreadSafeRefCounted<FancyRefCountedClass, DestructionThread::Main> {
+public:
+    static Ref<FancyRefCountedClass> create()
+    {
+        return adoptRef(*new FancyRefCountedClass());
+    }
+
+    virtual ~FancyRefCountedClass();
+
+private:
+    FancyRefCountedClass();
+};
+
+template<class T, DestructionThread destructionThread = DestructionThread::Any> class BadThreadSafeRefCounted : public ThreadSafeRefCountedBase {
+public:
+    void deref() const
+    {
+        if (!derefBase())
+            return;
+
+        [this] {
+          delete static_cast<const T*>(this);
+        };
+    }
+
+protected:
+    BadThreadSafeRefCounted() = default;
+};
+
+class FancyRefCountedClass2 final : public ThreadSafeRefCounted<FancyRefCountedClass, DestructionThread::Main> {
+// expected-warning@-1{{Class 'ThreadSafeRefCounted<FancyRefCountedClass, DestructionThread::Main>' is used as a base of class 'FancyRefCountedClass2' but doesn't have virtual destructor}}
+public:
+    static Ref<FancyRefCountedClass2> create()
+    {
+        return adoptRef(*new FancyRefCountedClass2());
+    }
+
+    virtual ~FancyRefCountedClass2();
+
+private:
+    FancyRefCountedClass2();
+};
+
+template<class T, DestructionThread destructionThread = DestructionThread::Any> class NestedThreadSafeRefCounted : public ThreadSafeRefCountedBase {
+public:
+    void deref() const
+    {
+        if (!derefBase())
+            return;
+        ensureOnMainRunLoop([&] {
+          auto destroyThis = [&] {
+            delete static_cast<const T*>(this);
+          };
+          destroyThis();
+        });
+    }
+
+protected:
+    NestedThreadSafeRefCounted() = default;
+};
+
+class FancyRefCountedClass3 final : public NestedThreadSafeRefCounted<FancyRefCountedClass3, DestructionThread::Main> {
+public:
+    static Ref<FancyRefCountedClass3> create()
+    {
+        return adoptRef(*new FancyRefCountedClass3());
+    }
+
+    virtual ~FancyRefCountedClass3();
+
+private:
+    FancyRefCountedClass3();
+};
+
+template<class T, DestructionThread destructionThread = DestructionThread::Any> class BadNestedThreadSafeRefCounted : public ThreadSafeRefCountedBase {
+public:
+    void deref() const
+    {
+        if (!derefBase())
+            return;
+        ensureOnMainThread([&] {
+          auto destroyThis = [&] {
+            delete static_cast<const T*>(this);
+          };
+        });
+    }
+
+protected:
+    BadNestedThreadSafeRefCounted() = default;
+};
+
+class FancyRefCountedClass4 final : public BadNestedThreadSafeRefCounted<FancyRefCountedClass4, DestructionThread::Main> {
+// expected-warning@-1{{Class 'BadNestedThreadSafeRefCounted<FancyRefCountedClass4, DestructionThread::Main>' is used as a base of class 'FancyRefCountedClass4' but doesn't have virtual destructor}}
+public:
+    static Ref<FancyRefCountedClass4> create()
+    {
+        return adoptRef(*new FancyRefCountedClass4());
+    }
+
+    virtual ~FancyRefCountedClass4();
+
+private:
+    FancyRefCountedClass4();
+};
diff --git clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp
index a98c6eb9c84d..97efb354f037 100644
--- clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp
+++ clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp
@@ -6,6 +6,7 @@
 void WTFBreakpointTrap();
 void WTFCrashWithInfo(int, const char*, const char*, int);
 void WTFReportAssertionFailure(const char* file, int line, const char* function, const char* assertion);
+void WTFReportBacktrace(void);
 
 void WTFCrash(void);
 void WTFCrashWithSecurityImplication(void);
@@ -334,6 +335,7 @@ public:
   }
   unsigned trivial60() { return ObjectWithNonTrivialDestructor { 5 }.value(); }
   unsigned trivial61() { return DerivedNumber('7').value(); }
+  void trivial62() { WTFReportBacktrace(); }
 
   static RefCounted& singleton() {
     static RefCounted s_RefCounted;
@@ -341,6 +343,12 @@ public:
     return s_RefCounted;
   }
 
+  static RefCounted& otherSingleton() {
+    static RefCounted s_RefCounted;
+    s_RefCounted.ref();
+    return s_RefCounted;
+  }
+
   Number nonTrivial1() { return Number(3) + Number(4); }
   Number nonTrivial2() { return Number { 0.3 }; }
   int nonTrivial3() { return v ? otherFunction() : 0; }
@@ -506,9 +514,12 @@ public:
     getFieldTrivial().trivial59(); // no-warning
     getFieldTrivial().trivial60(); // no-warning
     getFieldTrivial().trivial61(); // no-warning
+    getFieldTrivial().trivial62(); // no-warning
 
     RefCounted::singleton().trivial18(); // no-warning
     RefCounted::singleton().someFunction(); // no-warning
+    RefCounted::otherSingleton().trivial18(); // no-warning
+    RefCounted::otherSingleton().someFunction(); // no-warning
 
     getFieldTrivial().recursiveTrivialFunction(7); // no-warning
     getFieldTrivial().recursiveComplexFunction(9);
diff --git clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm
new file mode 100644
index 000000000000..db0c5b19eec5
--- /dev/null
+++ clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm
@@ -0,0 +1,26 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s
+// expected-no-diagnostics
+
+#import "mock-types.h"
+#import "mock-system-header.h"
+#import "../../Inputs/system-header-simulator-for-objc-dealloc.h"
+
+@interface Foo : NSObject
+
+@property (nonatomic, readonly) RefPtr<RefCountable> countable;
+
+- (void)execute;
+- (RefPtr<RefCountable>)_protectedRefCountable;
+@end
+
+@implementation Foo
+
+- (void)execute {
+  self._protectedRefCountable->method();
+}
+
+- (RefPtr<RefCountable>)_protectedRefCountable {
+  return _countable;
+}
+
+@end
diff --git clang/test/Analysis/asm.cpp clang/test/Analysis/asm.cpp
index b17ab04994d2..e0691dc4d794 100644
--- clang/test/Analysis/asm.cpp
+++ clang/test/Analysis/asm.cpp
@@ -2,6 +2,8 @@
 // RUN:      -analyzer-checker debug.ExprInspection,core -Wno-error=invalid-gnu-asm-cast -w %s -verify
 
 int clang_analyzer_eval(int);
+void clang_analyzer_dump(int);
+void clang_analyzer_dump_ptr(void *);
 
 int global;
 void testRValueOutput() {
@@ -40,3 +42,13 @@ void testInlineAsmMemcpyUninit(void)
     MyMemcpy(&a[1], &b[1], sizeof(b) - sizeof(b[1]));
     c = a[0]; // expected-warning{{Assigned value is garbage or undefined}}
 }
+
+void testAsmWithVoidPtrArgument()
+{
+  extern void *globalVoidPtr;
+  clang_analyzer_dump(*(int *)globalVoidPtr); // expected-warning-re {{reg_${{[0-9]+}}<int Element{SymRegion{reg_${{[0-9]+}}<void * globalVoidPtr>},0 S64b,int}>}}
+  clang_analyzer_dump_ptr(globalVoidPtr); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}<void * globalVoidPtr>}}}
+  asm ("" : : "a"(globalVoidPtr)); // no crash
+  clang_analyzer_dump(*(int *)globalVoidPtr); // expected-warning {{Unknown}}
+  clang_analyzer_dump_ptr(globalVoidPtr); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}<void * globalVoidPtr>}}}
+}
diff --git clang/test/Analysis/ctor-array.cpp clang/test/Analysis/ctor-array.cpp
index 49412ee5a68c..52600b314b01 100644
--- clang/test/Analysis/ctor-array.cpp
+++ clang/test/Analysis/ctor-array.cpp
@@ -234,16 +234,58 @@ struct Parent {
 void member() {
   Parent arr[2];
 
-  // FIXME: Ideally these are TRUE, but at the moment InitListExpr has no
-  // knowledge about where the initializer list is used, so we can't bind
-  // the initializer list to the required region.
-  clang_analyzer_eval(arr[0].arr[0].x == 1); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(arr[0].arr[0].y == 2); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(arr[0].arr[1].x == 3); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(arr[0].arr[1].y == 4); // expected-warning{{UNKNOWN}}
-
-  clang_analyzer_eval(arr[1].arr[0].x == 1); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(arr[1].arr[0].y == 2); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(arr[1].arr[1].x == 3); // expected-warning{{UNKNOWN}}
-  clang_analyzer_eval(arr[1].arr[1].y == 4); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(arr[0].arr[0].x == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(arr[0].arr[0].y == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(arr[0].arr[1].x == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(arr[0].arr[1].y == 4); // expected-warning{{TRUE}}
+
+  clang_analyzer_eval(arr[1].arr[0].x == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(arr[1].arr[0].y == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(arr[1].arr[1].x == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(arr[1].arr[1].y == 4); // expected-warning{{TRUE}}
+}
+
+struct HasArr {
+  int arrDefault[2] = {1, 2};
+  int arr[2];
+  HasArr(int x, int y) : arr{x, y} {}
+};
+
+struct ArrCombination : public HasArr {
+    HasArr membDefault = {5, 6};
+    HasArr memb;
+    ArrCombination(int x) : HasArr(3, 4), memb{7, x} {}
+};
+
+void derived_and_member() {
+  ArrCombination a{8};
+  // FIXME: Default initializers for array members are not modeled.
+  clang_analyzer_eval(a.arrDefault[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a.arrDefault[1] == 2); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a.arr[0] == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(a.arr[1] == 4); // expected-warning{{TRUE}}
+  clang_analyzer_eval(a.membDefault.arrDefault[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a.membDefault.arrDefault[1] == 2); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a.membDefault.arr[0] == 5); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a.membDefault.arr[1] == 6); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a.memb.arrDefault[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a.memb.arrDefault[1] == 2); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a.memb.arr[0] == 7); // expected-warning{{TRUE}}
+  clang_analyzer_eval(a.memb.arr[1] == 8); // expected-warning{{TRUE}}
+
+}
+
+struct IncompleteArrInit {
+  int arr[2];
+  int arrDefault[3] = {1, 2, 3};
+  IncompleteArrInit() : arr{1}, arrDefault{2, 3} {}
+};
+
+void incomplete_array_init() {
+  IncompleteArrInit a;
+  clang_analyzer_eval(a.arr[0] == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(a.arr[1] == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(a.arrDefault[0] == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(a.arrDefault[1] == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(a.arrDefault[2] == 0); // expected-warning{{TRUE}}
 }
diff --git clang/test/Analysis/embed.c clang/test/Analysis/embed.c
new file mode 100644
index 000000000000..32f6c1303257
--- /dev/null
+++ clang/test/Analysis/embed.c
@@ -0,0 +1,12 @@
+// RUN: %clang_analyze_cc1 -std=c23 -analyzer-checker=core,debug.ExprInspection -verify %s
+
+void clang_analyzer_dump_ptr(const unsigned char *ptr);
+void clang_analyzer_dump(unsigned char val);
+
+int main() {
+    const unsigned char SelfBytes[] = {
+        #embed "embed.c"
+    };
+    clang_analyzer_dump_ptr(SelfBytes); // expected-warning {{&Element{SelfBytes,0 S64b,unsigned char}}}
+    clang_analyzer_dump(SelfBytes[0]); // expected-warning {{Unknown}} FIXME: This should be the `/` character.
+}
diff --git clang/test/Analysis/fread.c clang/test/Analysis/fread.c
index 3f286421fd7a..5dc6c0c74409 100644
--- clang/test/Analysis/fread.c
+++ clang/test/Analysis/fread.c
@@ -443,3 +443,33 @@ void test_unaligned_start_read(void) {
     fclose(fp);
   }
 }
+
+void no_crash_if_count_is_negative(long l, long r, unsigned char *buffer) {
+  FILE *fp = fopen("path", "r");
+  if (fp) {
+    if (l * r == -1) {
+      fread(buffer, 1, l * r, fp); // no-crash
+    }
+    fclose(fp);
+  }
+}
+
+void no_crash_if_size_is_negative(long l, long r, unsigned char *buffer) {
+  FILE *fp = fopen("path", "r");
+  if (fp) {
+    if (l * r == -1) {
+      fread(buffer, l * r, 1, fp); // no-crash
+    }
+    fclose(fp);
+  }
+}
+
+void no_crash_if_size_and_count_are_negative(long l, long r, unsigned char *buffer) {
+  FILE *fp = fopen("path", "r");
+  if (fp) {
+    if (l * r == -1) {
+      fread(buffer, l * r, l * r, fp); // no-crash
+    }
+    fclose(fp);
+  }
+}
diff --git clang/test/Analysis/lifetime-extended-regions.cpp clang/test/Analysis/lifetime-extended-regions.cpp
index 4e98bd4b0403..4458ad294af7 100644
--- clang/test/Analysis/lifetime-extended-regions.cpp
+++ clang/test/Analysis/lifetime-extended-regions.cpp
@@ -120,10 +120,11 @@ void aggregateWithReferences() {
   clang_analyzer_dump(viaReference);    // expected-warning-re {{&lifetime_extended_object{RefAggregate, viaReference, S{{[0-9]+}}} }}
   clang_analyzer_dump(viaReference.rx); // expected-warning-re {{&lifetime_extended_object{int, viaReference, S{{[0-9]+}}} }}
   clang_analyzer_dump(viaReference.ry); // expected-warning-re {{&lifetime_extended_object{Composite, viaReference, S{{[0-9]+}}} }}
-
-  // clang does not currently implement extending lifetime of object bound to reference members of aggregates,
-  // that are created from default member initializer (see `warn_unsupported_lifetime_extension` from `-Wdangling`)
-  RefAggregate defaultInitExtended{i}; // clang-bug does not extend `Composite`
+  
+  // FIXME: clang currently support extending lifetime of object bound to reference members of aggregates,
+  // that are created from default member initializer. But CFG and ExprEngine need to be updated to address this change.
+  // The following expect warning: {{&lifetime_extended_object{Composite, defaultInitExtended, S{{[0-9]+}}} }}
+  RefAggregate defaultInitExtended{i};
   clang_analyzer_dump(defaultInitExtended.ry); // expected-warning {{Unknown }}
 }
 
diff --git clang/test/Analysis/nullability-nocrash.c clang/test/Analysis/nullability-nocrash.c
new file mode 100644
index 000000000000..209b77082506
--- /dev/null
+++ clang/test/Analysis/nullability-nocrash.c
@@ -0,0 +1,13 @@
+// RUN: %clang_analyze_cc1 -w -analyzer-checker=nullability \
+// RUN:                       -analyzer-output=text -verify %s
+//
+// expected-no-diagnostics
+//
+// Previously there was an assertion requiring that if an Event is handled by
+// some enabled checker, then there must be at least one enabled checker which
+// can emit that kind of Event.
+// This assertion failed when NullabilityChecker (which is a subclass of
+// check::Event<ImplicitNullDerefEvent>) was enabled, but the checkers
+// inheriting from EventDispatcher<ImplicitNullDerefEvent> were all disabled.
+// This test file validates that enabling the nullability checkers (without any
+// other checkers) no longer causes a crash.
diff --git clang/test/Analysis/nullptr.cpp clang/test/Analysis/nullptr.cpp
index 825f6570af59..73f10a08d96c 100644
--- clang/test/Analysis/nullptr.cpp
+++ clang/test/Analysis/nullptr.cpp
@@ -173,3 +173,19 @@ void test_address_space_bind() {
   AS1 AS_ATTRIBUTE &r = *pa;
   r.x = 0; // no-warning
 }
+
+namespace ArrMemWithCtorInitializer {
+struct ArrayMem {
+  int* ptrArr[1];
+  int* memPtr;
+  ArrayMem() : ptrArr{nullptr}, memPtr{nullptr} {}
+  // expected-note@-1{{Storing null pointer value}}
+};
+
+void tp() {
+  ArrayMem obj; // expected-note{{Calling default constructor for 'ArrayMem'}}
+                // expected-note@-1{{Returning from default constructor for 'ArrayMem'}}
+  *obj.ptrArr[0] = 0; // expected-warning{{Dereference of null pointer}}
+                       // expected-note@-1{{Dereference of null pointer}}
+}
+} // namespace ArrMemWithCtorInitializer
diff --git clang/test/CXX/drs/cwg16xx.cpp clang/test/CXX/drs/cwg16xx.cpp
index cf6b45ceabf2..95e241f0d03e 100644
--- clang/test/CXX/drs/cwg16xx.cpp
+++ clang/test/CXX/drs/cwg16xx.cpp
@@ -449,6 +449,27 @@ namespace cwg1696 { // cwg1696: 7
       //   since-cxx14-note@-2 {{default member initializer declared here}}
     };
     A a{a, a};
+
+    struct A1 {
+      A1() : v(42) {}
+      // since-cxx14-error@-1 {{reference member 'v' binds to a temporary object whose lifetime would be shorter than the lifetime of the constructed object}}
+      // since-cxx14-note@#cwg1696-A1 {{reference member declared here}}
+      const int &v; // #cwg1696-A1
+    };
+
+    struct A2 {
+      A2() = default;
+      // since-cxx14-error@-1 {{reference member 'v' binds to a temporary object whose lifetime would be shorter than the lifetime of the constructed object}}
+      // since-cxx14-note-re@#cwg1696-A2-b {{in defaulted default constructor for {{.*}} first required here}}
+      // since-cxx14-note@#cwg1696-A2-a {{initializing field 'v' with default member initializer}}
+      A2(int v) : v(v) {}
+      // since-cxx14-warning@-1 {{binding reference member 'v' to stack allocated parameter 'v'}}
+      // since-cxx14-note@#cwg1696-A2-a {{reference member declared here}}
+      const int &v = 42;  // #cwg1696-A2-a
+    };
+    A2 a1;    // #cwg1696-A2-b
+    
+    A2 a2(1); // OK, unfortunately
 #endif
   }
 
@@ -483,8 +504,6 @@ namespace cwg1696 { // cwg1696: 7
     const A &a = A(); // #cwg1696-D1-a
   };
   D1 d1 = {}; // #cwg1696-d1
-  // since-cxx14-warning@-1 {{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported; lifetime of temporary will end at the end of the full-expression}}
-  //   since-cxx14-note@#cwg1696-D1-a {{initializing field 'a' with default member initializer}}
 
   struct D2 {
     const A &a = A(); // #cwg1696-D2-a
diff --git clang/test/CXX/drs/cwg18xx.cpp clang/test/CXX/drs/cwg18xx.cpp
index 61b7faa96a9f..7f0fb8cf589d 100644
--- clang/test/CXX/drs/cwg18xx.cpp
+++ clang/test/CXX/drs/cwg18xx.cpp
@@ -206,19 +206,28 @@ namespace cwg1814 { // cwg1814: yes
 #endif
 }
 
-namespace cwg1815 { // cwg1815: no
+namespace cwg1815 { // cwg1815: 20
 #if __cplusplus >= 201402L
-  // FIXME: needs codegen test
-  struct A { int &&r = 0; }; // #cwg1815-A
+  struct A { int &&r = 0; };
   A a = {};
-  // since-cxx14-warning@-1 {{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported; lifetime of temporary will end at the end of the full-expression}} FIXME
-  //   since-cxx14-note@#cwg1815-A {{initializing field 'r' with default member initializer}}
 
   struct B { int &&r = 0; }; // #cwg1815-B
   // since-cxx14-error@-1 {{reference member 'r' binds to a temporary object whose lifetime would be shorter than the lifetime of the constructed object}}
   //   since-cxx14-note@#cwg1815-B {{initializing field 'r' with default member initializer}}
   //   since-cxx14-note@#cwg1815-b {{in implicit default constructor for 'cwg1815::B' first required here}}
   B b; // #cwg1815-b
+
+#if __cplusplus >= 201703L
+  struct C { const int &r = 0; };
+  constexpr C c = {}; // OK, since cwg1815
+  static_assert(c.r == 0);
+
+  constexpr int f() {
+    A a = {}; // OK, since cwg1815
+    return a.r;
+  }
+  static_assert(f() == 0);
+#endif
 #endif
 }
 
diff --git clang/test/CXX/drs/cwg27xx.cpp clang/test/CXX/drs/cwg27xx.cpp
index 406c8ea41f3b..2b57dbc60aed 100644
--- clang/test/CXX/drs/cwg27xx.cpp
+++ clang/test/CXX/drs/cwg27xx.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++98 -pedantic-errors -verify=expected %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++98 -pedantic-errors -verify=expected,cxx98 %s
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++11 -pedantic-errors -verify=expected %s
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++14 -pedantic-errors -verify=expected %s
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -pedantic-errors -verify=expected %s
@@ -6,6 +6,29 @@
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -pedantic-errors -verify=expected,since-cxx23 %s
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++2c -pedantic-errors -verify=expected,since-cxx23,since-cxx26 %s
 
+#if __cplusplus == 199711L
+#define static_assert(...) __extension__ _Static_assert(__VA_ARGS__)
+// cxx98-error@-1 {{variadic macros are a C99 feature}}
+#endif
+
+#if __cplusplus == 199711L
+#define __enable_constant_folding(x) (__builtin_constant_p(x) ? (x) : (x))
+#else
+#define __enable_constant_folding
+#endif
+
+namespace std {
+#if __cplusplus >= 202002L
+  struct strong_ordering {
+    int n;
+    constexpr operator int() const { return n; }
+    static const strong_ordering less, equal, greater;
+  };
+  constexpr strong_ordering strong_ordering::less{-1},
+      strong_ordering::equal{0}, strong_ordering::greater{1};
+#endif
+} // namespace std
+
 namespace cwg2718 { // cwg2718: 2.7
 struct B {};
 struct D;
@@ -18,6 +41,27 @@ void f(B b) {
 struct D : B {};
 } // namespace cwg2718
 
+namespace cwg2749 { // cwg2749: 20
+
+extern int x[2];
+struct Y {
+  int i;
+  int j;
+};
+extern Y y[2];
+
+static_assert(__enable_constant_folding(static_cast<void*>(&x[0]) < static_cast<void*>(&x[1])), "");
+static_assert(__enable_constant_folding(static_cast<void*>(&y[0].i) < static_cast<void*>(&y[0].j)), "");
+static_assert(__enable_constant_folding(static_cast<void*>(&y[0].j) < static_cast<void*>(&y[1].i)), "");
+
+#if __cplusplus >= 202002L
+static_assert((static_cast<void*>(&x[0]) <=> static_cast<void*>(&x[1])) == std::strong_ordering::less);
+static_assert((static_cast<void*>(&y[0].i) <=> static_cast<void*>(&y[0].j)) == std::strong_ordering::less);
+static_assert((static_cast<void*>(&y[0].j) <=> static_cast<void*>(&y[1].i)) == std::strong_ordering::less);
+#endif
+
+} // namespace cwg2749
+
 namespace cwg2759 { // cwg2759: 19
 #if __cplusplus >= 201103L
 
@@ -134,7 +178,7 @@ void test() {
 }
 
 namespace cwg2798 { // cwg2798: 17
-#if __cpp_static_assert >= 202306
+#if __cplusplus > 202302L
 struct string {
   constexpr string() {
     data_ = new char[6]();
diff --git clang/test/CXX/expr/expr.const/p2-0x.cpp clang/test/CXX/expr/expr.const/p2-0x.cpp
index e3cd057baba7..767eee1c74f0 100644
--- clang/test/CXX/expr/expr.const/p2-0x.cpp
+++ clang/test/CXX/expr/expr.const/p2-0x.cpp
@@ -571,18 +571,19 @@ namespace UnspecifiedRelations {
   // [expr.rel]p3: Pointers to void can be compared [...] if both pointers
   // represent the same address or are both the null pointer [...]; otherwise
   // the result is unspecified.
+  // Same address restriction removed by CWG2749
   struct S { int a, b; } s;
   constexpr void *null = 0;
   constexpr void *pv = (void*)&s.a;
   constexpr void *qv = (void*)&s.b;
   constexpr bool v1 = null < (int*)0;
   constexpr bool v2 = null < pv; // expected-error {{constant expression}} expected-note {{comparison between 'nullptr' and '&s.a' has unspecified value}}
-  constexpr bool v3 = null == pv; // ok
-  constexpr bool v4 = qv == pv; // ok
-  constexpr bool v5 = qv >= pv; // expected-error {{constant expression}} expected-note {{unequal pointers to void}}
+  constexpr bool v3 = null == pv;
+  constexpr bool v4 = qv == pv;
+  constexpr bool v5 = qv >= pv;
   constexpr bool v6 = qv > null; // expected-error {{constant expression}} expected-note {{comparison between '&s.b' and 'nullptr' has unspecified value}}
-  constexpr bool v7 = qv <= (void*)&s.b; // ok
-  constexpr bool v8 = qv > (void*)&s.a; // expected-error {{constant expression}} expected-note {{unequal pointers to void}}
+  constexpr bool v7 = qv <= (void*)&s.b;
+  constexpr bool v8 = qv > (void*)&s.a;
 }
 
 // - an assignment or a compound assignment (5.17); or
diff --git clang/test/CXX/special/class.temporary/p6.cpp clang/test/CXX/special/class.temporary/p6.cpp
index 5554363cc69a..a6d2adfd1fd2 100644
--- clang/test/CXX/special/class.temporary/p6.cpp
+++ clang/test/CXX/special/class.temporary/p6.cpp
@@ -269,6 +269,40 @@ void init_capture_init_list() {
   // CHECK: }
 }
 
+void check_dr1815() { // dr1815: yes
+#if __cplusplus >= 201402L
+
+  struct A {
+    int &&r = 0;
+    ~A() {}
+  };
+
+  struct B {
+    A &&a = A{};
+    ~B() {}
+  };
+  B a = {};
+  
+  // CHECK: call {{.*}}block_scope_begin_function
+  extern void block_scope_begin_function();
+  extern void block_scope_end_function();
+  block_scope_begin_function();
+  {
+    // CHECK: call void @_ZZ12check_dr1815vEN1BD1Ev
+    // CHECK: call void @_ZZ12check_dr1815vEN1AD1Ev
+    B b = {};
+  }
+  // CHECK: call {{.*}}block_scope_end_function
+  block_scope_end_function();
+
+  // CHECK: call {{.*}}some_other_function
+  extern void some_other_function();
+  some_other_function();
+  // CHECK: call void @_ZZ12check_dr1815vEN1BD1Ev
+  // CHECK: call void @_ZZ12check_dr1815vEN1AD1Ev
+#endif
+}
+
 namespace P2718R0 {
 namespace basic {
 template <typename E> using T2 = std::list<E>;
diff --git clang/test/ClangScanDeps/verbose.test clang/test/ClangScanDeps/verbose.test
new file mode 100644
index 000000000000..99c5214c7620
--- /dev/null
+++ clang/test/ClangScanDeps/verbose.test
@@ -0,0 +1,28 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -v -o %t/result.json 2>&1 | FileCheck %s
+// CHECK:      *** Virtual File System Stats:
+// CHECK-NEXT: {{[[:digit:]]+}} status() calls
+// CHECK-NEXT: {{[[:digit:]]+}} openFileForRead() calls
+// CHECK-NEXT: {{[[:digit:]]+}} dir_begin() calls
+// CHECK-NEXT: {{[[:digit:]]+}} getRealPath() calls
+// CHECK-NEXT: {{[[:digit:]]+}} exists() calls
+// CHECK-NEXT: {{[[:digit:]]+}} isLocal() calls
+
+//--- tu.c
+
+//--- cdb.json.in
+[
+  {
+    "file": "DIR/tu.c"
+    "directory": "DIR",
+    "command": "clang -c DIR/tu.c -o DIR/tu.o"
+  },
+  {
+    "file": "DIR/tu.c"
+    "directory": "DIR",
+    "command": "clang -c DIR/tu.c -o DIR/tu.o"
+  }
+]
diff --git clang/test/CodeGen/2005-01-02-ConstantInits.c clang/test/CodeGen/2005-01-02-ConstantInits.c
index 7772a64331ff..d90c2ea42da6 100644
--- clang/test/CodeGen/2005-01-02-ConstantInits.c
+++ clang/test/CodeGen/2005-01-02-ConstantInits.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --global-value-regex "@.+"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --global-value-regex "[A-Za-z].*"
 // RUN: %clang_cc1 -triple=x86_64-unknown-linux %s -emit-llvm -o - | FileCheck %s
 
 // This tests all kinds of hard cases with initializers and
@@ -51,7 +51,7 @@ int foo(int i) { return bar(&Arr[49])+bar(&Arr[i]); }
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    store ptr @Arr, ptr [[P]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 8
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 1
 // CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[P]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[TMP1]] to i64
diff --git clang/test/CodeGen/PowerPC/ppc-emmintrin.c clang/test/CodeGen/PowerPC/ppc-emmintrin.c
index a3650beec625..4c4d0dfce05e 100644
--- clang/test/CodeGen/PowerPC/ppc-emmintrin.c
+++ clang/test/CodeGen/PowerPC/ppc-emmintrin.c
@@ -1012,14 +1012,14 @@ test_shuffle() {
 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
 // CHECK: sext i32 %[[AND4]] to i64
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
 // CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
 // CHECK: add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])
 
@@ -1050,7 +1050,7 @@ test_shuffle() {
 // CHECK: sext i32 %[[AND4]] to i64
 // CHECK-LE: store <2 x i64> <i64 1663540288323457296, i64 0>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
 // CHECK-BE: store <2 x i64> <i64 1157726452361532951, i64 0>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
-// CHECK-COUNT-4: getelementptr inbounds [4 x i16], ptr @_mm_shufflehi_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
+// CHECK-COUNT-4: getelementptr inbounds nuw [4 x i16], ptr @_mm_shufflehi_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
 // CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
 
 // CHECK-LABEL: define available_externally <2 x i64> @_mm_shufflelo_epi16
@@ -1067,7 +1067,7 @@ test_shuffle() {
 // CHECK: sext i32 %[[AND4]] to i64
 // CHECK-LE: store <2 x i64> <i64 0, i64 2242261671028070680>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
 // CHECK-BE: store <2 x i64> <i64 0, i64 1736447835066146335>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
-// CHECK-COUNT-4: getelementptr inbounds [4 x i16], ptr @_mm_shufflelo_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
+// CHECK-COUNT-4: getelementptr inbounds nuw [4 x i16], ptr @_mm_shufflelo_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
 // CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
 
 void __attribute__((noinline))
diff --git clang/test/CodeGen/PowerPC/ppc-xmmintrin.c clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
index 95dfd1202f15..4a15fa9f76ce 100644
--- clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
+++ clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
@@ -894,16 +894,16 @@ test_shuffle() {
 // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
 // CHECK: sext i32 %[[AND4]] to i64
-// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
 // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
-// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
 // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
-// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
 // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
-// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
 // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
@@ -923,14 +923,14 @@ test_shuffle() {
 // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
 // CHECK: sext i32 %[[AND4]] to i64
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
 // CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
 // CHECK: %[[ADD2:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD2]], i32 3
 // CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])
diff --git clang/test/CodeGen/PowerPC/transparent_union.c clang/test/CodeGen/PowerPC/transparent_union.c
deleted file mode 100644
index 968a385c0ee4..000000000000
--- clang/test/CodeGen/PowerPC/transparent_union.c
+++ /dev/null
@@ -1,67 +0,0 @@
-// RUN: %clang_cc1 -triple powerpc64le-unknown-linux -O2 -target-cpu pwr7 \
-// RUN:   -emit-llvm -fshort-enums %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64
-// RUN: %clang_cc1 -triple powerpc64-unknown-linux -O2 -target-cpu pwr7 \
-// RUN:   -emit-llvm -fshort-enums %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64
-// RUN: %clang_cc1 -triple powerpc-unknown-linux -O2 -target-cpu pwr7 \
-// RUN:   -emit-llvm -fshort-enums %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-32
-// RUN: %clang_cc1 -triple powerpc64-unknown-aix -O2 -target-cpu pwr7 \
-// RUN:   -emit-llvm -fshort-enums %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64
-// RUN: %clang_cc1 -triple powerpc-unknown-aix -O2 -target-cpu pwr7 \
-// RUN:   -emit-llvm -fshort-enums %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-32
-
-typedef union tu_c {
-  signed char a;
-  signed char b;
-} tu_c_t __attribute__((transparent_union));
-
-typedef union tu_s {
-  short a;
-} tu_s_t __attribute__((transparent_union));
-
-typedef union tu_us {
-  unsigned short a;
-} tu_us_t __attribute__((transparent_union));
-
-typedef union tu_l {
-  long a;
-} tu_l_t __attribute__((transparent_union));
-
-// CHECK-LABEL: define{{.*}} void @ftest0(
-// CHECK-SAME: i8 noundef signext [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    ret void
-void ftest0(tu_c_t uc) { }
-
-// CHECK-LABEL: define{{.*}} void @ftest1(
-// CHECK-SAME: i16 noundef signext [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    ret void
-void ftest1(tu_s_t uc) { }
-
-// CHECK-LABEL: define{{.*}} void @ftest2(
-// CHECK-SAME: i16 noundef zeroext [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    ret void
-void ftest2(tu_us_t uc) { }
-
-// CHECK-64-LABEL: define{{.*}} void @ftest3(
-// CHECK-64-SAME: i64 [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-64-NEXT:  [[ENTRY:.*:]]
-// CHECK-64-NEXT:    ret void
-//
-// CHECK-32-LABEL: define{{.*}} void @ftest3(
-// CHECK-32-SAME: i32 [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-32-NEXT:  [[ENTRY:.*:]]
-// CHECK-32-NEXT:    ret void
-void ftest3(tu_l_t uc) { }
-
-typedef union etest {
-  enum flag {red, yellow, blue} fl;
-  enum weekend {sun, sat} b;
-} etest_t __attribute__((transparent_union));
-
-// CHECK-LABEL: define{{.*}} void @ftest4(
-// CHECK-SAME: i8 noundef zeroext [[A_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    ret void
-void ftest4(etest_t a) {}
diff --git clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
new file mode 100644
index 000000000000..c2e891217fbb
--- /dev/null
+++ clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -Wall -Werror -verify
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m256i test_mm512_cvtts_roundpd_epi32(__m512d A) {
+  return _mm512_cvtts_roundpd_epi32(A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm512_mask_cvtts_roundpd_epi32(__m256i W, __mmask8 U, __m512d A) {
+  return _mm512_mask_cvtts_roundpd_epi32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm512_maskz_cvtts_roundpd_epi32(__mmask8 U, __m512d A) {
+  return _mm512_maskz_cvtts_roundpd_epi32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm512_cvtts_roundpd_epu32(__m512d A) {
+  return _mm512_cvtts_roundpd_epu32(A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm512_mask_cvtts_roundpd_epu32(__m256i W, __mmask8 U, __m512d A) {
+  return _mm512_mask_cvtts_roundpd_epu32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm512_maskz_cvtts_roundpd_epu32(__mmask8 U, __m512d A) {
+  return _mm512_maskz_cvtts_roundpd_epu32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_cvtts_roundps_epi32(__m512 A) {
+  return _mm512_cvtts_roundps_epi32(A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epi32(__m512i W, __mmask8 U, __m512 A) {
+  return _mm512_mask_cvtts_roundps_epi32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epi32(__mmask8 U, __m512 A) {
+  return _mm512_maskz_cvtts_roundps_epi32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_cvtts_roundps_epu32(__m512 A) {
+  return _mm512_cvtts_roundps_epu32(A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epu32(__m512i W, __mmask8 U, __m512 A) {
+  return _mm512_mask_cvtts_roundps_epu32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epu32(__mmask8 U, __m512 A) {
+  return _mm512_maskz_cvtts_roundps_epu32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
\ No newline at end of file
diff --git clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
new file mode 100755
index 000000000000..290025691457
--- /dev/null
+++ clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -Wall -Werror -verify
+
+#include <immintrin.h>
+#include <stddef.h>
+
+long long test_mm_cvttssd_si64(__m128d __A) {
+  return _mm_cvtts_roundsd_si64(__A, 22); // expected-error {{invalid rounding argument}}
+}
+
+long long test_mm_cvttssd_i64(__m128d __A) {
+  return _mm_cvtts_roundsd_i64(__A, 22); // expected-error {{invalid rounding argument}}
+}
+
+unsigned long long test_mm_cvttssd_u64(__m128d __A) {
+  return _mm_cvtts_roundsd_u64(__A, 22); // expected-error {{invalid rounding argument}}
+}
+
+float test_mm_cvttsss_i64(__m128 __A) {
+  return _mm_cvtts_roundss_i64(__A, 22); // expected-error {{invalid rounding argument}}
+}
+
+long long test_mm_cvttsss_si64(__m128 __A) {
+  return _mm_cvtts_roundss_si64(__A, 22); // expected-error {{invalid rounding argument}}
+}
+
+unsigned long long test_mm_cvttsss_u64(__m128 __A) {
+  return _mm_cvtts_roundss_u64(__A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_cvtts_roundpd_epi64(__m512d A) {
+  return _mm512_cvtts_roundpd_epi64( A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_mask_cvtts_roundpd_epi64(__m512i W, __mmask8 U, __m512d A) {
+  return _mm512_mask_cvtts_roundpd_epi64( W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_maskz_cvtts_roundpd_epi64(__mmask8 U, __m512d A) {
+  return _mm512_maskz_cvtts_roundpd_epi64( U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_cvtts_roundpd_epu64(__m512d A) {
+  return _mm512_cvtts_roundpd_epu64( A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_mask_cvtts_roundpd_epu64(__m512i W, __mmask8 U, __m512d A) {
+  return _mm512_mask_cvtts_roundpd_epu64( W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_maskz_cvtts_roundpd_epu64(__mmask8 U, __m512d A) {
+  return _mm512_maskz_cvtts_roundpd_epu64( U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_cvtts_roundps_epi64(__m256 A) {
+  return _mm512_cvtts_roundps_epi64( A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epi64(__m512i W, __mmask8 U, __m256 A) {
+  return _mm512_mask_cvtts_roundps_epi64( W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epi64(__mmask8 U, __m256 A) {
+  return _mm512_maskz_cvtts_roundps_epi64( U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_cvtts_roundps_epu64(__m256 A) {
+  return _mm512_cvtts_roundps_epu64( A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epu64(__m512i W, __mmask8 U, __m256 A) {
+  return _mm512_mask_cvtts_roundps_epu64( W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epu64(__mmask8 U, __m256 A) {
+  return _mm512_maskz_cvtts_roundps_epu64( U, A, 22); // expected-error {{invalid rounding argument}}
+}
diff --git clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
new file mode 100644
index 000000000000..8c8959a03d7b
--- /dev/null
+++ clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
@@ -0,0 +1,184 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+long long test_mm_cvttssd_si64(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvttssd_si64(
+  // CHECK: @llvm.x86.avx10.vcvttsd2sis64(<2 x double>
+  return _mm_cvtts_roundsd_si64(__A, _MM_FROUND_NO_EXC);
+}
+
+long long test_mm_cvttssd_i64(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvttssd_i64(
+  // CHECK: @llvm.x86.avx10.vcvttsd2sis64(<2 x double>
+  return _mm_cvtts_roundsd_i64(__A, _MM_FROUND_NO_EXC);
+}
+
+unsigned long long test_mm_cvttssd_u64(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvttssd_u64(
+  // CHECK: @llvm.x86.avx10.vcvttsd2usis64(<2 x double>
+  return _mm_cvtts_roundsd_u64(__A, _MM_FROUND_NO_EXC);
+}
+
+float test_mm_cvttsss_i64(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvttsss_i64(
+  // CHECK: @llvm.x86.avx10.vcvttss2sis64(<4 x float>
+  return _mm_cvtts_roundss_i64(__A, _MM_FROUND_NO_EXC);
+}
+
+long long test_mm_cvttsss_si64(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvttsss_si64(
+  // CHECK: @llvm.x86.avx10.vcvttss2sis64(<4 x float>
+  return _mm_cvtts_roundss_si64(__A, _MM_FROUND_NO_EXC);
+}
+
+unsigned long long test_mm_cvttsss_u64(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvttsss_u64(
+  // CHECK: @llvm.x86.avx10.vcvttss2usis64(<4 x float>
+  return _mm_cvtts_roundss_u64(__A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttspd_epi64(__m512d A) {
+  // CHECK-LABEL: test_mm512_cvttspd_epi64
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double>
+  return _mm512_cvttspd_epi64(A);
+}
+
+__m512i test_mm512_mask_cvttspd_epi64(__m512i W, __mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_mask_cvttspd_epi64
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double>
+  return _mm512_mask_cvttspd_epi64(W, U, A);
+}
+
+__m512i test_mm512_maskz_cvttspd_epi64(__mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_maskz_cvttspd_epi64
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double>
+  return _mm512_maskz_cvttspd_epi64(U, A);
+}
+
+__m512i test_mm512_cvtts_roundpd_epi64(__m512d A) {
+  // CHECK-LABEL: test_mm512_cvtts_roundpd_epi64
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double>
+  return _mm512_cvtts_roundpd_epi64(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtts_roundpd_epi64(__m512i W, __mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epi64
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double>
+  return _mm512_mask_cvtts_roundpd_epi64(W, U, A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtts_roundpd_epi64(__mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epi64
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double>
+  return _mm512_maskz_cvtts_roundpd_epi64(U, A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttspd_epu64(__m512d A) {
+  // CHECK-LABEL: test_mm512_cvttspd_epu64
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double>
+  return _mm512_cvttspd_epu64(A);
+}
+
+__m512i test_mm512_mask_cvttspd_epu64(__m512i W, __mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_mask_cvttspd_epu64
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double>
+  return _mm512_mask_cvttspd_epu64(W, U, A);
+}
+
+__m512i test_mm512_maskz_cvttspd_epu64(__mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_maskz_cvttspd_epu64
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double>
+  return _mm512_maskz_cvttspd_epu64(U, A);
+}
+
+__m512i test_mm512_cvtts_roundpd_epu64(__m512d A) {
+  // CHECK-LABEL: test_mm512_cvtts_roundpd_epu64
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double>
+  return _mm512_cvtts_roundpd_epu64(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtts_roundpd_epu64(__m512i W, __mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epu64
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double>
+  return _mm512_mask_cvtts_roundpd_epu64(W, U, A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtts_roundpd_epu64(__mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epu64
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double>
+  return _mm512_maskz_cvtts_roundpd_epu64(U, A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttsps_epi64(__m256 A) {
+  // CHECK-LABEL: test_mm512_cvttsps_epi64
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float>
+  return _mm512_cvttsps_epi64(A);
+}
+
+__m512i test_mm512_mask_cvttsps_epi64(__m512i W, __mmask8 U, __m256 A) {
+  // CHECK-LABEL: test_mm512_mask_cvttsps_epi64
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float>
+  return _mm512_mask_cvttsps_epi64(W, U, A);
+}
+
+__m512i test_mm512_maskz_cvttsps_epi64(__mmask8 U, __m256 A) {
+  // CHECK-LABEL: test_mm512_maskz_cvttsps_epi64
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float>
+  return _mm512_maskz_cvttsps_epi64(U, A);
+}
+
+__m512i test_mm512_cvtts_roundps_epi64(__m256 A) {
+  // CHECK-LABEL: test_mm512_cvtts_roundps_epi64
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float>
+  return _mm512_cvtts_roundps_epi64(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epi64(__m512i W, __mmask8 U, __m256 A) {
+  // CHECK-LABEL: test_mm512_mask_cvtts_roundps_epi64
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float>
+  return _mm512_mask_cvtts_roundps_epi64(W, U, A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epi64(__mmask8 U, __m256 A) {
+  // CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epi64
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float>
+  return _mm512_maskz_cvtts_roundps_epi64(U, A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttsps_epu64(__m256 A) {
+  // CHECK-LABEL: test_mm512_cvttsps_epu64
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float>
+  return _mm512_cvttsps_epu64(A);
+}
+
+__m512i test_mm512_mask_cvttsps_epu64(__m512i W, __mmask8 U, __m256 A) {
+  // CHECK-LABEL: test_mm512_mask_cvttsps_epu64
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float>
+  return _mm512_mask_cvttsps_epu64(W, U, A);
+}
+
+__m512i test_mm512_maskz_cvttsps_epu64(__mmask8 U, __m256 A) {
+  // CHECK-LABEL: test_mm512_maskz_cvttsps_epu64
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float>
+  return _mm512_maskz_cvttsps_epu64(U, A);
+}
+
+__m512i test_mm512_cvtts_roundps_epu64(__m256 A) {
+  // CHECK-LABEL: test_mm512_cvtts_roundps_epu64
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float>
+  return _mm512_cvtts_roundps_epu64(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epu64(__m512i W, __mmask8 U, __m256 A) {
+  // CHECK-LABEL: test_mm512_mask_cvtts_roundps_epu64
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float>
+  return _mm512_mask_cvtts_roundps_epu64(W, U, A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epu64(__mmask8 U, __m256 A) {
+  // CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epu64
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float>
+  return _mm512_maskz_cvtts_roundps_epu64(U, A, _MM_FROUND_NO_EXC);
+}
diff --git clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
new file mode 100644
index 000000000000..cccee04627d2
--- /dev/null
+++ clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
@@ -0,0 +1,151 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X86
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m256i test_mm512_cvttspd_epi32(__m512d A) {
+  // CHECK-LABEL: test_mm512_cvttspd_epi32
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double>
+  return _mm512_cvttspd_epi32(A);
+}
+
+__m256i test_mm512_mask_cvttspd_epi32(__m256i W, __mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_mask_cvttspd_epi32
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double>
+  return _mm512_mask_cvttspd_epi32(W, U, A);
+}
+
+__m256i test_mm512_maskz_cvttspd_epi32(__mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_maskz_cvttspd_epi32
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double>
+  return _mm512_maskz_cvttspd_epi32(U, A);
+}
+
+__m256i test_mm512_cvtts_roundpd_epi32(__m512d A) {
+  // CHECK-LABEL: test_mm512_cvtts_roundpd_epi32
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double>
+  return _mm512_cvtts_roundpd_epi32(A, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm512_mask_cvtts_roundpd_epi32(__m256i W, __mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epi32
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double>
+  return _mm512_mask_cvtts_roundpd_epi32(W, U, A, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm512_maskz_cvtts_roundpd_epi32(__mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epi32
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double>
+  return _mm512_maskz_cvtts_roundpd_epi32(U, A, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm512_cvttspd_epu32(__m512d A) {
+  // CHECK-LABEL: test_mm512_cvttspd_epu32
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double>
+  return _mm512_cvttspd_epu32(A);
+}
+
+__m256i test_mm512_mask_cvttspd_epu32(__m256i W, __mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_mask_cvttspd_epu32
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double>
+  return _mm512_mask_cvttspd_epu32(W, U, A);
+}
+
+__m256i test_mm512_maskz_cvttspd_epu32(__mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_maskz_cvttspd_epu32
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double>
+  return _mm512_maskz_cvttspd_epu32(U, A);
+}
+
+__m256i test_mm512_cvtts_roundpd_epu32(__m512d A) {
+  // CHECK-LABEL: test_mm512_cvtts_roundpd_epu32
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double>
+  return _mm512_cvtts_roundpd_epu32(A, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm512_mask_cvtts_roundpd_epu32(__m256i W, __mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epu32
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double>
+  return _mm512_mask_cvtts_roundpd_epu32(W, U, A, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm512_maskz_cvtts_roundpd_epu32(__mmask8 U, __m512d A) {
+  // CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epu32
+  // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double>
+  return _mm512_maskz_cvtts_roundpd_epu32(U, A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttsps_epi32(__m512 A) {
+  // CHECK-LABEL: test_mm512_cvttsps_epi32
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float>
+  return _mm512_cvttsps_epi32(A);
+}
+
+__m512i test_mm512_mask_cvttsps_epi32(__m512i W, __mmask8 U, __m512 A) {
+  // CHECK-LABEL: test_mm512_mask_cvttsps_epi32
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float>
+  return _mm512_mask_cvttsps_epi32(W, U, A);
+}
+
+__m512i test_mm512_maskz_cvttsps_epi32(__mmask8 U, __m512 A) {
+  // CHECK-LABEL: test_mm512_maskz_cvttsps_epi32
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float>
+  return _mm512_maskz_cvttsps_epi32(U, A);
+}
+
+__m512i test_mm512_cvtts_roundps_epi32(__m512 A) {
+  // CHECK-LABEL: test_mm512_cvtts_roundps_epi32
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float>
+  return _mm512_cvtts_roundps_epi32(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epi32(__m512i W, __mmask8 U, __m512 A) {
+  // CHECK-LABEL: test_mm512_mask_cvtts_roundps_epi32
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float>
+  return _mm512_mask_cvtts_roundps_epi32(W, U, A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epi32(__mmask8 U, __m512 A) {
+  // CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epi32
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float>
+  return _mm512_maskz_cvtts_roundps_epi32(U, A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttsps_epu32(__m512 A) {
+  // CHECK-LABEL: test_mm512_cvttsps_epu32
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float>
+  return _mm512_cvttsps_epu32(A);
+}
+
+__m512i test_mm512_mask_cvttsps_epu32(__m512i W, __mmask8 U, __m512 A) {
+  // CHECK-LABEL: test_mm512_mask_cvttsps_epu32
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float>
+  return _mm512_mask_cvttsps_epu32(W, U, A);
+}
+
+__m512i test_mm512_maskz_cvttsps_epu32(__mmask8 U, __m512 A) {
+  // CHECK-LABEL: test_mm512_maskz_cvttsps_epu32
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float>
+  return _mm512_maskz_cvttsps_epu32(U, A);
+}
+
+__m512i test_mm512_cvtts_roundps_epu32(__m512 A) {
+  // CHECK-LABEL: test_mm512_cvtts_roundps_epu32
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float>
+  return _mm512_cvtts_roundps_epu32(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epu32(__m512i W, __mmask8 U, __m512 A) {
+  // CHECK-LABEL: test_mm512_mask_cvtts_roundps_epu32
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float>
+  return _mm512_mask_cvtts_roundps_epu32(W, U, A, _MM_FROUND_NO_EXC);
+}
+__m512i test_mm512_maskz_cvtts_roundps_epu32(__mmask8 U, __m512 A) {
+  // CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epu32
+  // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float>
+  return _mm512_maskz_cvtts_roundps_epu32(U, A, _MM_FROUND_NO_EXC);
+}
+
+// X64: {{.*}}
+// X86: {{.*}}
\ No newline at end of file
diff --git clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
new file mode 100644
index 000000000000..72d2769dc210
--- /dev/null
+++ clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -Wall -Werror -verify
+
+unsigned long long test_mm_cvttssd(unsigned long long __A) {
+  return _mm_cvttssd(__A); // expected-error {{call to undeclared function '_mm_cvttssd'}}
+}
+
+unsigned long long test_mm_cvttsss(unsigned long long __A) {
+  return _mm_cvttsss(__A); // expected-error {{call to undeclared function '_mm_cvttsss'}}
+}
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m128i test_mm256_cvtts_roundpd_epi32(__m256d A) {
+  return _mm256_cvtts_roundpd_epi32(A, 22); // expected-error {{invalid rounding argument}}
+}
+__m128i test_mm256_mask_cvtts_roundpd_epi32(__m128i W, __mmask8 U, __m256d A) {
+  return _mm256_mask_cvtts_roundpd_epi32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m128i test_mm256_maskz_cvtts_roundpd_epi32(__mmask8 U, __m256d A) {
+  return _mm256_maskz_cvtts_roundpd_epi32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m128i test_mm256_cvtts_roundpd_epu32(__m256d A) {
+  return _mm256_cvtts_roundpd_epu32(A, 22); // expected-error {{invalid rounding argument}}
+}
+__m128i test_mm256_mask_cvtts_roundpd_epu32(__m128i W, __mmask8 U, __m256d A) {
+  return _mm256_mask_cvtts_roundpd_epu32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m128i test_mm256_maskz_cvtts_roundpd_epu32(__mmask8 U, __m256d A) {
+  return _mm256_maskz_cvtts_roundpd_epu32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm256_cvtts_roundps_epi32(__m256 A) {
+  return _mm256_cvtts_roundps_epi32(A, 22); // expected-error {{invalid rounding argument}}
+}
+__m256i test_mm256_mask_cvtts_roundps_epi32(__m256i W, __mmask8 U, __m256 A) {
+  return _mm256_mask_cvtts_roundps_epi32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm256_maskz_cvtts_roundps_epi32(__mmask8 U, __m256 A) {
+  return _mm256_maskz_cvtts_roundps_epi32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm256_cvtts_roundps_epu32(__m256 A) {
+  return _mm256_cvtts_roundps_epu32(A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm256_mask_cvtts_roundps_epu32(__m256i W, __mmask8 U, __m256 A) {
+  return _mm256_mask_cvtts_roundps_epu32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm256_maskz_cvtts_roundps_epu32(__mmask8 U, __m256 A) {
+  return _mm256_maskz_cvtts_roundps_epu32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
diff --git clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c
new file mode 100644
index 000000000000..00384731a51f
--- /dev/null
+++ clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c
@@ -0,0 +1,262 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+// scalar
+
+int test_mm_cvttssd_i32(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvttssd_i32
+  // CHECK: @llvm.x86.avx10.vcvttsd2sis
+  return _mm_cvtts_roundsd_i32(__A, _MM_FROUND_NO_EXC);
+}
+
+int test_mm_cvttssd_si32(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvttssd_si32(
+  // CHECK: @llvm.x86.avx10.vcvttsd2sis(<2 x double>
+  return _mm_cvtts_roundsd_si32(__A, _MM_FROUND_NO_EXC);
+}
+
+unsigned test_mm_cvttssd_u32(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvttssd_u32(
+  // CHECK: @llvm.x86.avx10.vcvttsd2usis(<2 x double>
+  return _mm_cvtts_roundsd_u32(__A, _MM_FROUND_NO_EXC);
+}
+
+int test_mm_cvttsss_i32(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvttsss_i32(
+  // CHECK: @llvm.x86.avx10.vcvttss2sis(<4 x float>
+  return _mm_cvtts_roundss_i32(__A, _MM_FROUND_NO_EXC);
+}
+
+int test_mm_cvttsss_si32(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvttsss_si32(
+  // CHECK: @llvm.x86.avx10.vcvttss2sis(<4 x float>
+  return _mm_cvtts_roundss_si32(__A, _MM_FROUND_NO_EXC);
+}
+
+unsigned test_mm_cvttsss_u32(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvttsss_u32(
+  // CHECK: @llvm.x86.avx10.vcvttss2usis(<4 x float>
+  return _mm_cvtts_roundss_u32(__A, _MM_FROUND_NO_EXC);
+}
+
+// vector
+// 128 bit
+__m128i test_mm_cvttspd_epi64(__m128d A){
+    // CHECK-LABEL: @test_mm_cvttspd_epi64
+    // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.128(<2 x double>
+    return _mm_cvttspd_epi64(A);
+}
+
+__m128i test_mm_mask_cvttspd_epi64(__m128i W, __mmask8 U, __m128d A){
+    // CHECK-LABEL: @test_mm_mask_cvttspd_epi64
+    // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.128(<2 x double>
+    return _mm_mask_cvttspd_epi64(W, U,  A);
+}
+
+__m128i test_mm_maskz_cvttspd_epi64(__mmask8 U,__m128d A){
+    // CHECK-LABEL: @test_mm_maskz_cvttspd_epi64
+    // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.128(<2 x double>
+    return _mm_maskz_cvttspd_epi64(U, A);
+}
+
+__m128i test_mm_cvttspd_epu64(__m128d A){
+    // CHECK-LABEL: @test_mm_cvttspd_epu64
+    // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.128(<2 x double>
+    return _mm_cvttspd_epu64(A);
+}
+
+__m128i test_mm_mask_cvttspd_epu64(__m128i W, __mmask8 U, __m128d A){
+    // CHECK-LABEL: @test_mm_mask_cvttspd_epu64
+    // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.128(<2 x double>
+    return _mm_mask_cvttspd_epu64(W, U,  A);
+}
+
+__m128i test_mm_maskz_cvttspd_epu64(__mmask8 U,__m128d A){
+    // CHECK-LABEL: @test_mm_maskz_cvttspd_epu64
+    // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.128(<2 x double>
+    return _mm_maskz_cvttspd_epu64(U, A);
+}
+
+// 256 bit
+__m256i test_mm256_cvttspd_epi64(__m256d A){
+// CHECK-LABEL: @test_mm256_cvttspd_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
+    return _mm256_cvttspd_epi64(A);
+}
+
+__m256i test_mm256_mask_cvttspd_epi64(__m256i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvttspd_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
+    return _mm256_mask_cvttspd_epi64(W,U, A);
+}
+
+__m256i test_mm256_maskz_cvttspd_epi64(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvttspd_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
+    return _mm256_maskz_cvttspd_epi64(U, A);
+}
+
+__m256i test_mm256_cvtts_roundpd_epi64(__m256d A){
+// CHECK-LABEL: @test_mm256_cvtts_roundpd_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
+    return _mm256_cvtts_roundpd_epi64(A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_mask_cvtts_roundpd_epi64(__m256i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
+    return _mm256_mask_cvtts_roundpd_epi64(W,U,A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_maskz_cvtts_roundpd_epi64(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
+    return _mm256_maskz_cvtts_roundpd_epi64(U,A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_cvttspd_epu64(__m256d A){
+// CHECK-LABEL: @test_mm256_cvttspd_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
+    return _mm256_cvttspd_epu64(A);
+}
+
+__m256i test_mm256_mask_cvttspd_epu64(__m256i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvttspd_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
+    return _mm256_mask_cvttspd_epu64(W,U, A);
+}
+
+__m256i test_mm256_maskz_cvttspd_epu64(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvttspd_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
+    return _mm256_maskz_cvttspd_epu64(U, A);
+}
+
+__m256i test_mm256_cvtts_roundpd_epu64(__m256d A){
+// CHECK-LABEL: @test_mm256_cvtts_roundpd_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
+    return _mm256_cvtts_roundpd_epu64(A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_mask_cvtts_roundpd_epu64(__m256i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
+    return _mm256_mask_cvtts_roundpd_epu64(W,U,A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_maskz_cvtts_roundpd_epu64(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
+    return _mm256_maskz_cvtts_roundpd_epu64(U,A,_MM_FROUND_NEARBYINT );
+}
+
+// 128 bit
+__m128i test_mm_cvttsps_epi64(__m128 A){
+    // CHECK-LABEL: @test_mm_cvttsps_epi64
+    // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.128(<4 x float>
+    return _mm_cvttsps_epi64(A);
+}
+
+__m128i test_mm_mask_cvttsps_epi64(__m128i W, __mmask8 U, __m128 A){
+    // CHECK-LABEL: @test_mm_mask_cvttsps_epi64
+    // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.128(<4 x float>
+    return _mm_mask_cvttsps_epi64(W, U,  A);
+}
+
+__m128i test_mm_maskz_cvttsps_epi64(__mmask8 U,__m128 A){
+    // CHECK-LABEL: @test_mm_maskz_cvttsps_epi64
+    // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.128(<4 x float>
+    return _mm_maskz_cvttsps_epi64(U, A);
+}
+
+__m128i test_mm_cvttsps_epu64(__m128 A){
+    // CHECK-LABEL: @test_mm_cvttsps_epu64
+    // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.128(<4 x float>
+    return _mm_cvttsps_epu64(A);
+}
+
+__m128i test_mm_mask_cvttsps_epu64(__m128i W, __mmask8 U, __m128 A){
+    // CHECK-LABEL: @test_mm_mask_cvttsps_epu64
+    // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.128(<4 x float>
+    return _mm_mask_cvttsps_epu64(W, U,  A);
+}
+
+__m128i test_mm_maskz_cvttsps_epu64(__mmask8 U,__m128 A){
+    // CHECK-LABEL: @test_mm_maskz_cvttsps_epu64
+    // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.128(<4 x float>
+    return _mm_maskz_cvttsps_epu64(U, A);
+}
+
+__m256i test_mm256_cvttsps_epi64(__m128 A){
+// CHECK-LABEL: @test_mm256_cvttsps_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
+  return _mm256_cvttsps_epi64(A);
+}
+
+__m256i test_mm256_mask_cvttsps_epi64(__m256i W,__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_mask_cvttsps_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
+    return _mm256_mask_cvttsps_epi64(W,U, A);
+}
+
+__m256i test_mm256_maskz_cvttsps_epi64(__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_maskz_cvttsps_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
+    return _mm256_maskz_cvttsps_epi64(U, A);
+}
+
+__m256i test_mm256_cvtts_roundps_epi64(__m128 A){
+// CHECK-LABEL: @test_mm256_cvtts_roundps_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
+    return _mm256_cvtts_roundps_epi64(A, _MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_mask_cvtts_roundps_epi64(__m256i W,__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
+    return _mm256_mask_cvtts_roundps_epi64(W,U,A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_maskz_cvtts_roundps_epi64(__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
+    return _mm256_maskz_cvtts_roundps_epi64(U,A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_cvttsps_epu64(__m128 A){
+// CHECK-LABEL: @test_mm256_cvttsps_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
+  return _mm256_cvttsps_epu64(A);
+}
+
+__m256i test_mm256_mask_cvttsps_epu64(__m256i W,__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_mask_cvttsps_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
+    return _mm256_mask_cvttsps_epu64(W,U, A);
+}
+
+__m256i test_mm256_maskz_cvttsps_epu64(__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_maskz_cvttsps_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
+    return _mm256_maskz_cvttsps_epu64(U, A);
+}
+
+__m256i test_mm256_cvtts_roundps_epu64(__m128 A){
+// CHECK-LABEL: @test_mm256_cvtts_roundps_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
+    return _mm256_cvtts_roundps_epu64(A, _MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_mask_cvtts_roundps_epu64(__m256i W,__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
+    return _mm256_mask_cvtts_roundps_epu64(W,U,A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_maskz_cvtts_roundps_epu64(__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
+    return _mm256_maskz_cvtts_roundps_epu64(U,A,_MM_FROUND_NEARBYINT );
+}
diff --git clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c
new file mode 100644
index 000000000000..bb90f6a086fa
--- /dev/null
+++ clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c
@@ -0,0 +1,225 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X86
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s  --check-prefixes=CHECK,X64
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m128i test_mm_cvttspd_epi32(__m128d A){
+// CHECK-LABEL: @test_mm_cvttspd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.128(<2 x double>
+  return _mm_cvttspd_epi32(A);
+}
+
+__m128i test_mm_mask_cvttspd_epi32(__m128i W, __mmask8 U, __m128d A){
+// CHECK-LABEL: @test_mm_mask_cvttspd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.128(<2 x double>
+    return _mm_mask_cvttspd_epi32(W,U,A);
+}
+
+__m128i test_mm_maskz_cvttspd_epi32( __mmask8 U, __m128d A){
+// CHECK-LABEL: @test_mm_maskz_cvttspd_epi32(
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.128(<2 x double>
+    return _mm_maskz_cvttspd_epi32(U,A);
+}
+
+__m128i test_mm256_cvttspd_epi32(__m256d A){
+// CHECK-LABEL: @test_mm256_cvttspd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double>
+  return _mm256_cvttspd_epi32(A);
+}
+
+__m128i test_mm256_mask_cvttspd_epi32(__m128i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvttspd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double>
+    return _mm256_mask_cvttspd_epi32(W,U,A);
+}
+
+__m128i test_mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvttspd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double>
+    return _mm256_maskz_cvttspd_epi32(U,A);
+}
+
+__m128i test_mm256_cvtts_roundpd_epi32(__m256d A){
+// CHECK-LABEL: @test_mm256_cvtts_roundpd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double>
+    return _mm256_cvtts_roundpd_epi32(A, _MM_FROUND_NEARBYINT);
+}
+
+__m128i test_mm256_mask_cvtts_roundpd_epi32(__m128i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double>
+    return _mm256_mask_cvtts_roundpd_epi32(W,U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m128i test_mm256_maskz_cvtts_roundpd_epi32(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double>
+    return _mm256_maskz_cvtts_roundpd_epi32(U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m128i test_mm_cvttspd_epu32(__m128d A){
+// CHECK-LABEL: @test_mm_cvttspd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.128(<2 x double>
+  return _mm_cvttspd_epu32(A);
+}
+
+__m128i test_mm_mask_cvttspd_epu32(__m128i W, __mmask8 U, __m128d A){
+// CHECK-LABEL: @test_mm_mask_cvttspd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.128(<2 x double>
+    return _mm_mask_cvttspd_epu32(W,U,A);
+}
+
+__m128i test_mm_maskz_cvttspd_epu32( __mmask8 U, __m128d A){
+// CHECK-LABEL: @test_mm_maskz_cvttspd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.128(<2 x double>
+    return _mm_maskz_cvttspd_epu32(U,A);
+}
+
+
+__m128i test_mm256_cvttspd_epu32(__m256d A){
+// CHECK-LABEL: @test_mm256_cvttspd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double>
+  return _mm256_cvttspd_epu32(A);
+}
+
+__m128i test_mm256_mask_cvttspd_epu32(__m128i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvttspd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double>
+    return _mm256_mask_cvttspd_epu32(W,U,A);
+}
+
+__m128i test_mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvttspd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double>
+    return _mm256_maskz_cvttspd_epu32(U,A);
+}
+
+__m128i test_mm256_cvtts_roundpd_epu32(__m256d A){
+// CHECK-LABEL: @test_mm256_cvtts_roundpd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double>
+    return _mm256_cvtts_roundpd_epu32(A, _MM_FROUND_NEARBYINT);
+}
+
+__m128i test_mm256_mask_cvtts_roundpd_epu32(__m128i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double>
+    return _mm256_mask_cvtts_roundpd_epu32(W,U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m128i test_mm256_maskz_cvtts_roundpd_epu32(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double>
+    return _mm256_maskz_cvtts_roundpd_epu32(U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m128i test_mm_cvttsps_epi32(__m128 A){
+// CHECK-LABEL: @test_mm_cvttsps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.128(<4 x float>
+  return _mm_cvttsps_epi32(A);
+}
+
+__m128i test_mm_mask_cvttsps_epi32(__m128i W, __mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm_mask_cvttsps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.128(<4 x float>
+    return _mm_mask_cvttsps_epi32(W,U,A);
+}
+
+__m128i test_mm_maskz_cvttsps_epi32( __mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm_maskz_cvttsps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.128(<4 x float>
+    return _mm_maskz_cvttsps_epi32(U,A);
+}
+
+__m256i test_mm256_cvttsps_epi32(__m256 A){
+// CHECK-LABEL: @test_mm256_cvttsps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float>
+  return _mm256_cvttsps_epi32(A);
+}
+
+__m256i test_mm256_mask_cvttsps_epi32(__m256i W,__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_mask_cvttsps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float>
+    return _mm256_mask_cvttsps_epi32(W,U,A);
+}
+
+__m256i test_mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_maskz_cvttsps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float>
+    return _mm256_maskz_cvttsps_epi32(U,A);
+}
+
+__m256i test_mm256_cvtts_roundps_epi32(__m256 A){
+// CHECK-LABEL: @test_mm256_cvtts_roundps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float>
+    return _mm256_cvtts_roundps_epi32(A, _MM_FROUND_NEARBYINT);
+}
+
+__m256i test_mm256_mask_cvtts_roundps_epi32(__m256i W,__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float>
+    return _mm256_mask_cvtts_roundps_epi32(W,U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m256i test_mm256_maskz_cvtts_roundps_epi32(__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float>
+    return _mm256_maskz_cvtts_roundps_epi32(U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m128i test_mm_cvttsps_epu32(__m128 A){
+// CHECK-LABEL: @test_mm_cvttsps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.128(<4 x float>
+  return _mm_cvttsps_epu32(A);
+}
+
+__m128i test_mm_mask_cvttsps_epu32(__m128i W, __mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm_mask_cvttsps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.128(<4 x float>
+    return _mm_mask_cvttsps_epu32(W,U,A);
+}
+
+__m128i test_mm_maskz_cvttsps_epu32( __mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm_maskz_cvttsps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.128(<4 x float>
+    return _mm_maskz_cvttsps_epu32(U,A);
+}
+
+__m256i test_mm256_cvttsps_epu32(__m256 A){
+// CHECK-LABEL: @test_mm256_cvttsps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float>
+  return _mm256_cvttsps_epu32(A);
+}
+
+__m256i test_mm256_mask_cvttsps_epu32(__m256i W,__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_mask_cvttsps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float>
+    return _mm256_mask_cvttsps_epu32(W,U,A);
+}
+
+__m256i test_mm256_maskz_cvttsps_epu32(__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_maskz_cvttsps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float>
+    return _mm256_maskz_cvttsps_epu32(U,A);
+}
+
+__m256i test_mm256_cvtts_roundps_epu32(__m256 A){
+// CHECK-LABEL: @test_mm256_cvtts_roundps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float>
+    return _mm256_cvtts_roundps_epu32(A, _MM_FROUND_NEARBYINT);
+}
+
+__m256i test_mm256_mask_cvtts_roundps_epu32(__m256i W,__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float>
+    return _mm256_mask_cvtts_roundps_epu32(W,U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m256i test_mm256_maskz_cvtts_roundps_epu32(__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float>
+    return _mm256_maskz_cvtts_roundps_epu32(U,A,_MM_FROUND_NEARBYINT);
+}
+
+// X64: {{.*}}
+// X86: {{.*}}
diff --git clang/test/CodeGen/X86/bfloat16-convert-half.c clang/test/CodeGen/X86/bfloat16-convert-half.c
new file mode 100644
index 000000000000..55451dc6f092
--- /dev/null
+++ clang/test/CodeGen/X86/bfloat16-convert-half.c
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -disable-O0-optnone -emit-llvm \
+// RUN:   %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local half @test_convert_from_bf16_to_fp16(
+// CHECK-SAME: bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[FPEXT:%.*]] = fpext bfloat [[A]] to float
+// CHECK-NEXT:    [[FPTRUNC:%.*]] = fptrunc float [[FPEXT]] to half
+// CHECK-NEXT:    ret half [[FPTRUNC]]
+//
+_Float16 test_convert_from_bf16_to_fp16(__bf16 a) {
+    return (_Float16)a;
+}
+
+// CHECK-LABEL: define dso_local bfloat @test_convert_from_fp16_to_bf16(
+// CHECK-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[FPEXT:%.*]] = fpext half [[A]] to float
+// CHECK-NEXT:    [[FPTRUNC:%.*]] = fptrunc float [[FPEXT]] to bfloat
+// CHECK-NEXT:    ret bfloat [[FPTRUNC]]
+//
+__bf16 test_convert_from_fp16_to_bf16(_Float16 a) {
+    return (__bf16)a;
+}
+
diff --git clang/test/CodeGen/aarch64-neon-faminmax-intrinsics.c clang/test/CodeGen/aarch64-neon-faminmax-intrinsics.c
new file mode 100644
index 000000000000..3ae98b5723d7
--- /dev/null
+++ clang/test/CodeGen/aarch64-neon-faminmax-intrinsics.c
@@ -0,0 +1,107 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -O3 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -S -O3 -Werror -Wall -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: define dso_local <4 x half> @test_vamin_f16(
+// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[FAMIN2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.famin.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
+// CHECK-NEXT:    ret <4 x half> [[FAMIN2_I]]
+//
+float16x4_t test_vamin_f16(float16x4_t vn, float16x4_t vm) {
+  return vamin_f16(vn, vm);
+}
+
+// CHECK-LABEL: define dso_local <8 x half> @test_vaminq_f16(
+// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[FAMIN2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.famin.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
+// CHECK-NEXT:    ret <8 x half> [[FAMIN2_I]]
+//
+float16x8_t test_vaminq_f16(float16x8_t vn, float16x8_t vm) {
+  return vaminq_f16(vn, vm);
+}
+
+// CHECK-LABEL: define dso_local <2 x float> @test_vamin_f32(
+// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[FAMIN2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.famin.v2f32(<2 x float> [[VN]], <2 x float> [[VM]])
+// CHECK-NEXT:    ret <2 x float> [[FAMIN2_I]]
+//
+float32x2_t test_vamin_f32(float32x2_t vn, float32x2_t vm) {
+  return vamin_f32(vn, vm);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_vaminq_f32(
+// CHECK-SAME: <4 x float> noundef [[VN:%.*]], <4 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[FAMIN2_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.famin.v4f32(<4 x float> [[VN]], <4 x float> [[VM]])
+// CHECK-NEXT:    ret <4 x float> [[FAMIN2_I]]
+//
+float32x4_t test_vaminq_f32(float32x4_t vn, float32x4_t vm) {
+  return vaminq_f32(vn, vm);
+}
+
+// CHECK-LABEL: define dso_local <2 x double> @test_vaminq_f64(
+// CHECK-SAME: <2 x double> noundef [[VN:%.*]], <2 x double> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[FAMIN2_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.famin.v2f64(<2 x double> [[VN]], <2 x double> [[VM]])
+// CHECK-NEXT:    ret <2 x double> [[FAMIN2_I]]
+//
+float64x2_t test_vaminq_f64(float64x2_t vn, float64x2_t vm) {
+  return vaminq_f64(vn, vm);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_vamax_f16(
+// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[FAMAX2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.famax.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
+// CHECK-NEXT:    ret <4 x half> [[FAMAX2_I]]
+//
+float16x4_t test_vamax_f16(float16x4_t vn, float16x4_t vm) {
+  return vamax_f16(vn, vm);
+}
+
+// CHECK-LABEL: define dso_local <8 x half> @test_vamaxq_f16(
+// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[FAMAX2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.famax.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
+// CHECK-NEXT:    ret <8 x half> [[FAMAX2_I]]
+//
+float16x8_t test_vamaxq_f16(float16x8_t vn, float16x8_t vm) {
+  return vamaxq_f16(vn, vm);
+}
+
+// CHECK-LABEL: define dso_local <2 x float> @test_vamax_f32(
+// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[FAMAX2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.famax.v2f32(<2 x float> [[VN]], <2 x float> [[VM]])
+// CHECK-NEXT:    ret <2 x float> [[FAMAX2_I]]
+//
+float32x2_t test_vamax_f32(float32x2_t vn, float32x2_t vm) {
+  return vamax_f32(vn, vm);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_vamaxq_f32(
+// CHECK-SAME: <4 x float> noundef [[VN:%.*]], <4 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[FAMAX2_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.famax.v4f32(<4 x float> [[VN]], <4 x float> [[VM]])
+// CHECK-NEXT:    ret <4 x float> [[FAMAX2_I]]
+//
+float32x4_t test_vamaxq_f32(float32x4_t vn, float32x4_t vm) {
+  return vamaxq_f32(vn, vm);
+}
+
+// CHECK-LABEL: define dso_local <2 x double> @test_vamaxq_f64(
+// CHECK-SAME: <2 x double> noundef [[VN:%.*]], <2 x double> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[FAMAX2_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.famax.v2f64(<2 x double> [[VN]], <2 x double> [[VM]])
+// CHECK-NEXT:    ret <2 x double> [[FAMAX2_I]]
+//
+float64x2_t test_vamaxq_f64(float64x2_t vn, float64x2_t vm) {
+  return vamaxq_f64(vn, vm);
+}
diff --git clang/test/CodeGen/aarch64-neon-luti.c clang/test/CodeGen/aarch64-neon-luti.c
new file mode 100644
index 000000000000..4b485636d45b
--- /dev/null
+++ clang/test/CodeGen/aarch64-neon-luti.c
@@ -0,0 +1,507 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: aarch64-registered-target
+#include <arm_neon.h>
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -O3 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_u8(
+// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+uint8x16_t test_vluti2_lane_u8(uint8x8_t vn, uint8x8_t vm) {
+  return vluti2_lane_u8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_u8(
+// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
+//
+uint8x16_t test_vluti2_laneq_u8(uint8x8_t vn, uint8x16_t vm) {
+  return vluti2_laneq_u8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_u8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+uint8x16_t test_vluti2q_lane_u8(uint8x16_t vn, uint8x8_t vm) {
+  return vluti2q_lane_u8(vn, vm, 1);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_u8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
+//
+uint8x16_t test_vluti2q_laneq_u8(uint8x16_t vn, uint8x16_t vm) {
+  return vluti2q_laneq_u8(vn, vm, 3);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_s8(
+// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+int8x16_t test_vluti2_lane_s8(int8x8_t vn, uint8x8_t vm) {
+  return vluti2_lane_s8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_s8(
+// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
+//
+int8x16_t test_vluti2_laneq_s8(int8x8_t vn, uint8x16_t vm) {
+  return vluti2_laneq_s8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_s8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+int8x16_t test_vluti2q_lane_s8(int8x16_t vn, uint8x8_t vm) {
+  return vluti2q_lane_s8(vn, vm, 1);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_s8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
+//
+int8x16_t test_vluti2q_laneq_s8(int8x16_t vn, uint8x16_t vm) {
+  return vluti2q_laneq_s8(vn, vm, 3);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_p8(
+// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+poly8x16_t test_vluti2_lane_p8(poly8x8_t vn, uint8x8_t vm) {
+  return vluti2_lane_p8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_p8(
+// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
+//
+poly8x16_t test_vluti2_laneq_p8(poly8x8_t vn, uint8x16_t vm) {
+  return vluti2_laneq_p8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_p8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+poly8x16_t test_vluti2q_lane_p8(poly8x16_t vn, uint8x8_t vm) {
+  return vluti2q_lane_p8(vn, vm, 1);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_p8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
+//
+poly8x16_t test_vluti2q_laneq_p8(poly8x16_t vn, uint8x16_t vm) {
+  return vluti2q_laneq_p8(vn, vm, 3);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_u16(
+// CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANE1]]
+//
+uint16x8_t test_vluti2_lane_u16(uint16x4_t vn, uint8x8_t vm) {
+  return vluti2_lane_u16(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_u16(
+// CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANEQ1]]
+//
+uint16x8_t test_vluti2_laneq_u16(uint16x4_t vn, uint8x16_t vm) {
+  return vluti2_laneq_u16(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_u16(
+// CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANE1]]
+//
+uint16x8_t test_vluti2q_lane_u16(uint16x8_t vn, uint8x8_t vm) {
+  return vluti2q_lane_u16(vn, vm, 3);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_u16(
+// CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANEQ1]]
+//
+uint16x8_t test_vluti2q_laneq_u16(uint16x8_t vn, uint8x16_t vm) {
+  return vluti2q_laneq_u16(vn, vm, 7);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_s16(
+// CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANE1]]
+//
+int16x8_t test_vluti2_lane_s16(int16x4_t vn, uint8x8_t vm) {
+  return vluti2_lane_s16(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_s16(
+// CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANEQ1]]
+//
+int16x8_t test_vluti2_laneq_s16(int16x4_t vn, uint8x16_t vm) {
+  return vluti2_laneq_s16(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_s16(
+// CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANE1]]
+//
+int16x8_t test_vluti2q_lane_s16(int16x8_t vn, uint8x8_t vm) {
+  return vluti2q_lane_s16(vn, vm, 3);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_s16(
+// CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANEQ1]]
+//
+int16x8_t test_vluti2q_laneq_s16(int16x8_t vn, uint8x16_t vm) {
+  return vluti2q_laneq_s16(vn, vm, 7);
+}
+
+// CHECK-LABEL: define dso_local <8 x half> @test_vluti2_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v4f16(<4 x half> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x half> [[VLUTI2_LANE1]]
+//
+float16x8_t test_vluti2_lane_f16(float16x4_t vn, uint8x8_t vm) {
+  return vluti2_lane_f16(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x half> @test_vluti2_laneq_f16(
+// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v4f16(<4 x half> [[VN]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x half> [[VLUTI2_LANEQ1]]
+//
+float16x8_t test_vluti2_laneq_f16(float16x4_t vn, uint8x16_t vm) {
+  return vluti2_laneq_f16(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x half> @test_vluti2q_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v8f16(<8 x half> [[VN]], <8 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    ret <8 x half> [[VLUTI2_LANE1]]
+//
+float16x8_t test_vluti2q_lane_f16(float16x8_t vn, uint8x8_t vm) {
+  return vluti2q_lane_f16(vn, vm, 3);
+}
+
+// CHECK-LABEL: define dso_local <8 x half> @test_vluti2q_laneq_f16(
+// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v8f16(<8 x half> [[VN]], <16 x i8> [[VM]], i32 7)
+// CHECK-NEXT:    ret <8 x half> [[VLUTI2_LANEQ1]]
+//
+float16x8_t test_vluti2q_laneq_f16(float16x8_t vn, uint8x16_t vm) {
+  return vluti2q_laneq_f16(vn, vm, 7);
+}
+
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2_lane_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v4bf16(<4 x bfloat> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x bfloat> [[VLUTI2_LANE1]]
+//
+bfloat16x8_t test_vluti2_lane_bf16(bfloat16x4_t vn, uint8x8_t vm) {
+  return vluti2_lane_bf16(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2_laneq_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v4bf16(<4 x bfloat> [[VN]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x bfloat> [[VLUTI2_LANEQ1]]
+//
+bfloat16x8_t test_vluti2_laneq_bf16(bfloat16x4_t vn, uint8x16_t vm) {
+  return vluti2_laneq_bf16(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2q_lane_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v8bf16(<8 x bfloat> [[VN]], <8 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    ret <8 x bfloat> [[VLUTI2_LANE1]]
+//
+bfloat16x8_t test_vluti2q_lane_bf16(bfloat16x8_t vn, uint8x8_t vm) {
+  return vluti2q_lane_bf16(vn, vm, 3);
+}
+
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2q_laneq_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v8bf16(<8 x bfloat> [[VN]], <16 x i8> [[VM]], i32 7)
+// CHECK-NEXT:    ret <8 x bfloat> [[VLUTI2_LANEQ1]]
+//
+bfloat16x8_t test_vluti2q_laneq_bf16(bfloat16x8_t vn, uint8x16_t vm) {
+  return vluti2q_laneq_bf16(vn, vm, 7);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_p16(
+// CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANE1]]
+//
+poly16x8_t test_vluti2_lane_p16(poly16x4_t vn, uint8x8_t vm) {
+  return vluti2_lane_p16(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_p16(
+// CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANEQ1]]
+//
+poly16x8_t test_vluti2_laneq_p16(poly16x4_t vn, uint8x16_t vm) {
+  return vluti2_laneq_p16(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_p16(
+// CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANE1]]
+//
+poly16x8_t test_vluti2q_lane_p16(poly16x8_t vn, uint8x8_t vm) {
+  return vluti2q_lane_p16(vn, vm, 3);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_p16(
+// CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANEQ1]]
+//
+poly16x8_t test_vluti2q_laneq_p16(poly16x8_t vn, uint8x16_t vm) {
+  return vluti2q_laneq_p16(vn, vm, 7);
+}
+
+//
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_u8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANE]]
+//
+uint8x16_t test_vluti4q_lane_u8(uint8x16_t vn, uint8x8_t vm) {
+  return vluti4q_lane_u8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_u8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANEQ]]
+//
+uint8x16_t test_vluti4q_laneq_u8(uint8x16_t vn, uint8x16_t vm) {
+  return vluti4q_laneq_u8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_s8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANE]]
+//
+int8x16_t test_vluti4q_lane_s8(int8x16_t vn, uint8x8_t vm) {
+  return vluti4q_lane_s8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_s8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 1)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANEQ]]
+//
+int8x16_t test_vluti4q_laneq_s8(int8x16_t vn, uint8x16_t vm) {
+  return vluti4q_laneq_s8(vn, vm, 1);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_p8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANE]]
+//
+poly8x16_t test_vluti4q_lane_p8(poly8x16_t vn, uint8x8_t vm) {
+  return vluti4q_lane_p8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_p8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 1)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANEQ]]
+//
+poly8x16_t test_vluti4q_laneq_p8(poly8x16_t vn, uint8x16_t vm) {
+  return vluti4q_laneq_p8(vn, vm, 1);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_u16_x2(
+// CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
+// CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
+// CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI4Q_LANE_X24]]
+//
+uint16x8_t test_vluti4q_lane_u16_x2(uint16x8x2_t vn, uint8x8_t vm) {
+  return vluti4q_lane_u16_x2(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_u16_x2(
+// CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
+// CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
+// CHECK-NEXT:    [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI4Q_LANEQ_X24]]
+//
+uint16x8_t test_vluti4q_laneq_u16_x2(uint16x8x2_t vn, uint8x16_t vm) {
+  return vluti4q_laneq_u16_x2(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_s16_x2(
+// CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
+// CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
+// CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI4Q_LANE_X24]]
+//
+int16x8_t test_vluti4q_lane_s16_x2(int16x8x2_t vn, uint8x8_t vm) {
+  return vluti4q_lane_s16_x2(vn, vm, 1);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_s16_x2(
+// CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
+// CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
+// CHECK-NEXT:    [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI4Q_LANEQ_X24]]
+//
+int16x8_t test_vluti4q_laneq_s16_x2(int16x8x2_t vn, uint8x16_t vm) {
+  return vluti4q_laneq_s16_x2(vn, vm, 3);
+}
+
+// CHECK-LABEL: define dso_local <8 x half> @test_vluti4q_lane_f16_x2(
+// CHECK-SAME: [2 x <8 x half>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 0
+// CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 1
+// CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti4q.lane.x2.v8f16(<8 x half> [[VN_COERCE_FCA_0_EXTRACT]], <8 x half> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1)
+// CHECK-NEXT:    ret <8 x half> [[VLUTI4Q_LANE_X24]]
+//
+float16x8_t test_vluti4q_lane_f16_x2(float16x8x2_t vn, uint8x8_t vm) {
+  return vluti4q_lane_f16_x2(vn, vm, 1);
+}
+
+// CHECK-LABEL: define dso_local <8 x half> @test_vluti4q_laneq_f16_x2(
+// CHECK-SAME: [2 x <8 x half>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 0
+// CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 1
+// CHECK-NEXT:    [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti4q.laneq.x2.v8f16(<8 x half> [[VN_COERCE_FCA_0_EXTRACT]], <8 x half> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 1)
+// CHECK-NEXT:    ret <8 x half> [[VLUTI4Q_LANEQ_X24]]
+//
+float16x8_t test_vluti4q_laneq_f16_x2(float16x8x2_t vn, uint8x16_t vm) {
+  return vluti4q_laneq_f16_x2(vn, vm, 1);
+}
+
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti4q_lane_bf16_x2(
+// CHECK-SAME: [2 x <8 x bfloat>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 0
+// CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 1
+// CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.lane.x2.v8bf16(<8 x bfloat> [[VN_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1)
+// CHECK-NEXT:    ret <8 x bfloat> [[VLUTI4Q_LANE_X24]]
+//
+bfloat16x8_t test_vluti4q_lane_bf16_x2(bfloat16x8x2_t vn, uint8x8_t vm) {
+  return vluti4q_lane_bf16_x2(vn, vm, 1);
+}
+
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti4q_laneq_bf16_x2(
+// CHECK-SAME: [2 x <8 x bfloat>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 0
+// CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 1
+// CHECK-NEXT:    [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.laneq.x2.v8bf16(<8 x bfloat> [[VN_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 2)
+// CHECK-NEXT:    ret <8 x bfloat> [[VLUTI4Q_LANEQ_X24]]
+//
+bfloat16x8_t test_vluti4q_laneq_bf16_x2(bfloat16x8x2_t vn, uint8x16_t vm) {
+  return vluti4q_laneq_bf16_x2(vn, vm, 2);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_p16_x2(
+// CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
+// CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
+// CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI4Q_LANE_X24]]
+//
+poly16x8_t test_vluti4q_lane_p16_x2(poly16x8x2_t vn, uint8x8_t vm) {
+  return vluti4q_lane_p16_x2(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_p16_x2(
+// CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
+// CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
+// CHECK-NEXT:    [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI4Q_LANEQ_X24]]
+//
+poly16x8_t test_vluti4q_laneq_p16_x2(poly16x8x2_t vn, uint8x16_t vm) {
+  return vluti4q_laneq_p16_x2(vn, vm, 0);
+}
diff --git clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c
new file mode 100644
index 000000000000..5d026f8cde5e
--- /dev/null
+++ clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c
@@ -0,0 +1,476 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+#include <arm_sme.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+#endif
+
+
+// Multi, x2
+
+// CHECK-LABEL: @test_svamax_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZDN]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZM:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZM]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.famax.x2.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]], 0
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP5]], i64 0)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]], 1
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x half> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svamax_f16_x213svfloat16x2_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZDN:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZDN]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZM:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZM]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.famax.x2.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]], 0
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP5]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]], 1
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x half> [[TMP8]]
+//
+svfloat16x2_t test_svamax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming {
+  return SVE_ACLE_FUNC(svamax,_f16_x2)(zdn, zm);
+}
+
+// CHECK-LABEL: @test_svamax_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZDN]], i64 4)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZM:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZM]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.famax.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]])
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 0
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP5]], i64 0)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 1
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x float> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svamax_f32_x213svfloat32x2_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZDN:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZDN]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZM:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZM]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.famax.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]])
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 0
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP5]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 1
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x float> [[TMP8]]
+//
+svfloat32x2_t test_svamax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming {
+  return SVE_ACLE_FUNC(svamax,_f32_x2)(zdn, zm);
+}
+
+// CHECK-LABEL: @test_svamax_f64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZDN]], i64 2)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZM:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZM]], i64 2)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.famax.x2.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]])
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]], 0
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP5]], i64 0)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]], 1
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x double> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svamax_f64_x213svfloat64x2_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZDN:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZDN]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZM:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZM]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.famax.x2.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]])
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]], 0
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP5]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]], 1
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x double> [[TMP8]]
+//
+svfloat64x2_t test_svamax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming {
+  return SVE_ACLE_FUNC(svamax,_f64_x2)(zdn, zm);
+}
+
+// CHECK-LABEL: @test_svamin_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZDN]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZM:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZM]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.famin.x2.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]], 0
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP5]], i64 0)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]], 1
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x half> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svamin_f16_x213svfloat16x2_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZDN:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZDN]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZM:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZM]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.famin.x2.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]], 0
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP5]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]], 1
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x half> [[TMP8]]
+//
+svfloat16x2_t test_svamin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming {
+  return SVE_ACLE_FUNC(svamin,_f16_x2)(zdn, zm);
+}
+
+// CHECK-LABEL: @test_svamin_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZDN]], i64 4)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZM:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZM]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.famin.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]])
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 0
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP5]], i64 0)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 1
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x float> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svamin_f32_x213svfloat32x2_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZDN:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZDN]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZM:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZM]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.famin.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]])
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 0
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP5]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 1
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x float> [[TMP8]]
+//
+svfloat32x2_t test_svamin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming {
+  return SVE_ACLE_FUNC(svamin,_f32_x2)(zdn, zm);
+}
+
+// CHECK-LABEL: @test_svamin_f64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZDN]], i64 2)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZM:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZM]], i64 2)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.famin.x2.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]])
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]], 0
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP5]], i64 0)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]], 1
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x double> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svamin_f64_x213svfloat64x2_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZDN:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZDN]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZM:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[ZM]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.famin.x2.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]])
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]], 0
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP5]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]], 1
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x double> [[TMP8]]
+//
+svfloat64x2_t test_svamin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming {
+  return SVE_ACLE_FUNC(svamin,_f64_x2)(zdn, zm);
+}
+
+// Multi, x4
+
+// CHECK-LABEL: @test_svamax_f16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN]], i64 24)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 8)
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 16)
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 24)
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.famax.x4.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]])
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 0
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP9]], i64 0)
+// CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 1
+// CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP10]], <vscale x 8 x half> [[TMP11]], i64 8)
+// CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 2
+// CHECK-NEXT:    [[TMP14:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP12]], <vscale x 8 x half> [[TMP13]], i64 16)
+// CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 3
+// CHECK-NEXT:    [[TMP16:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP14]], <vscale x 8 x half> [[TMP15]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x half> [[TMP16]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svamax_f16_x413svfloat16x4_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN]], i64 24)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 24)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.famax.x4.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]])
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 0
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP9]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 1
+// CPP-CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP10]], <vscale x 8 x half> [[TMP11]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 2
+// CPP-CHECK-NEXT:    [[TMP14:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP12]], <vscale x 8 x half> [[TMP13]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 3
+// CPP-CHECK-NEXT:    [[TMP16:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP14]], <vscale x 8 x half> [[TMP15]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x half> [[TMP16]]
+//
+svfloat16x4_t test_svamax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming {
+  return SVE_ACLE_FUNC(svamax,_f16_x4)(zdn, zm);
+}
+
+// CHECK-LABEL: @test_svamax_f32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN]], i64 4)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN]], i64 12)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM]], i64 4)
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM]], i64 12)
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.famax.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]])
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 0
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP9]], i64 0)
+// CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 1
+// CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP10]], <vscale x 4 x float> [[TMP11]], i64 4)
+// CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 2
+// CHECK-NEXT:    [[TMP14:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP12]], <vscale x 4 x float> [[TMP13]], i64 8)
+// CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 3
+// CHECK-NEXT:    [[TMP16:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP14]], <vscale x 4 x float> [[TMP15]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x float> [[TMP16]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svamax_f32_x413svfloat32x4_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN]], i64 12)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM]], i64 12)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.famax.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]])
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 0
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP9]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 1
+// CPP-CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP10]], <vscale x 4 x float> [[TMP11]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 2
+// CPP-CHECK-NEXT:    [[TMP14:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP12]], <vscale x 4 x float> [[TMP13]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 3
+// CPP-CHECK-NEXT:    [[TMP16:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP14]], <vscale x 4 x float> [[TMP15]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x float> [[TMP16]]
+//
+svfloat32x4_t test_svamax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming {
+  return SVE_ACLE_FUNC(svamax,_f32_x4)(zdn, zm);
+}
+
+// CHECK-LABEL: @test_svamax_f64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN]], i64 2)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN]], i64 6)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM]], i64 2)
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM]], i64 6)
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.famax.x4.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]])
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 0
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP9]], i64 0)
+// CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 1
+// CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP10]], <vscale x 2 x double> [[TMP11]], i64 2)
+// CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 2
+// CHECK-NEXT:    [[TMP14:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP12]], <vscale x 2 x double> [[TMP13]], i64 4)
+// CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 3
+// CHECK-NEXT:    [[TMP16:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP14]], <vscale x 2 x double> [[TMP15]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x double> [[TMP16]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svamax_f64_x413svfloat64x4_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN]], i64 6)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM]], i64 6)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.famax.x4.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]])
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 0
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP9]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 1
+// CPP-CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP10]], <vscale x 2 x double> [[TMP11]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 2
+// CPP-CHECK-NEXT:    [[TMP14:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP12]], <vscale x 2 x double> [[TMP13]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 3
+// CPP-CHECK-NEXT:    [[TMP16:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP14]], <vscale x 2 x double> [[TMP15]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x double> [[TMP16]]
+//
+svfloat64x4_t test_svamax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming {
+  return SVE_ACLE_FUNC(svamax,_f64_x4)(zdn, zm);
+}
+
+// CHECK-LABEL: @test_svamin_f16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN]], i64 24)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 8)
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 16)
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 24)
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.famin.x4.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]])
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 0
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP9]], i64 0)
+// CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 1
+// CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP10]], <vscale x 8 x half> [[TMP11]], i64 8)
+// CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 2
+// CHECK-NEXT:    [[TMP14:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP12]], <vscale x 8 x half> [[TMP13]], i64 16)
+// CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 3
+// CHECK-NEXT:    [[TMP16:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP14]], <vscale x 8 x half> [[TMP15]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x half> [[TMP16]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svamin_f16_x413svfloat16x4_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN]], i64 24)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 24)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.famin.x4.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]])
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 0
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP9]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 1
+// CPP-CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP10]], <vscale x 8 x half> [[TMP11]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 2
+// CPP-CHECK-NEXT:    [[TMP14:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP12]], <vscale x 8 x half> [[TMP13]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP8]], 3
+// CPP-CHECK-NEXT:    [[TMP16:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP14]], <vscale x 8 x half> [[TMP15]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x half> [[TMP16]]
+//
+svfloat16x4_t test_svamin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming {
+  return SVE_ACLE_FUNC(svamin,_f16_x4)(zdn, zm);
+}
+
+// CHECK-LABEL: @test_svamin_f32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN]], i64 4)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN]], i64 12)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM]], i64 4)
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM]], i64 12)
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.famin.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]])
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 0
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP9]], i64 0)
+// CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 1
+// CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP10]], <vscale x 4 x float> [[TMP11]], i64 4)
+// CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 2
+// CHECK-NEXT:    [[TMP14:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP12]], <vscale x 4 x float> [[TMP13]], i64 8)
+// CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 3
+// CHECK-NEXT:    [[TMP16:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP14]], <vscale x 4 x float> [[TMP15]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x float> [[TMP16]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svamin_f32_x413svfloat32x4_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZDN]], i64 12)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZM]], i64 12)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.famin.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]])
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 0
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP9]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 1
+// CPP-CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP10]], <vscale x 4 x float> [[TMP11]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 2
+// CPP-CHECK-NEXT:    [[TMP14:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP12]], <vscale x 4 x float> [[TMP13]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP8]], 3
+// CPP-CHECK-NEXT:    [[TMP16:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP14]], <vscale x 4 x float> [[TMP15]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x float> [[TMP16]]
+//
+svfloat32x4_t test_svamin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming {
+  return SVE_ACLE_FUNC(svamin,_f32_x4)(zdn, zm);
+}
+
+// CHECK-LABEL: @test_svamin_f64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN]], i64 2)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN]], i64 6)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM]], i64 2)
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM]], i64 6)
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.famin.x4.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]])
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 0
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP9]], i64 0)
+// CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 1
+// CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP10]], <vscale x 2 x double> [[TMP11]], i64 2)
+// CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 2
+// CHECK-NEXT:    [[TMP14:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP12]], <vscale x 2 x double> [[TMP13]], i64 4)
+// CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 3
+// CHECK-NEXT:    [[TMP16:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP14]], <vscale x 2 x double> [[TMP15]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x double> [[TMP16]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svamin_f64_x413svfloat64x4_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZDN]], i64 6)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[ZM]], i64 6)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.famin.x4.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]])
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 0
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP9]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 1
+// CPP-CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP10]], <vscale x 2 x double> [[TMP11]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 2
+// CPP-CHECK-NEXT:    [[TMP14:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP12]], <vscale x 2 x double> [[TMP13]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP8]], 3
+// CPP-CHECK-NEXT:    [[TMP16:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP14]], <vscale x 2 x double> [[TMP15]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x double> [[TMP16]]
+//
+svfloat64x4_t test_svamin_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming {
+  return SVE_ACLE_FUNC(svamin,_f64_x4)(zdn, zm);
+}
diff --git clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c
new file mode 100644
index 000000000000..3cf7d99d606f
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c
@@ -0,0 +1,775 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP
+// RUN: %clang_cc1 -x c++ -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include "arm_sme.h"
+#else
+#include "arm_sve.h"
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_mu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_f16_m(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_xu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_f16_x(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_zu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famin_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_mu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_n_f16_m(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_xu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_n_f16_x(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_zu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famin_n_f16_z(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_mu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_f32_m(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f32, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_xu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_f32_x(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f32, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_zu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+svfloat32_t test_famin_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f32, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_n_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famin_n_f32_mu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_n_f32_m(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f32, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_n_f32_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famin_n_f32_xu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_n_f32_x(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f32, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_n_f32_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famin_n_f32_zu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+svfloat32_t test_famin_n_f32_z(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f32, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_f64_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famin_f64_mu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famin_f64_m(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f64, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_f64_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famin_f64_xu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famin_f64_x(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f64, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_f64_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famin_f64_zu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+svfloat64_t test_famin_f64_z(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f64, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_n_f64_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famin_n_f64_mu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famin_n_f64_m(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f64, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_n_f64_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famin_n_f64_xu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famin_n_f64_x(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f64, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_n_f64_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famin_n_f64_zu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+svfloat64_t test_famin_n_f64_z(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f64, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famax_f16_mu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famax_f16_m(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famax_f16_xu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famax_f16_x(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famax_f16_zu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famax_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_n_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famax_n_f16_mu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famax_n_f16_m(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_n_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famax_n_f16_xu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famax_n_f16_x(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_n_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famax_n_f16_zu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famax_n_f16_z(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famax_f32_mu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famax_f32_m(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f32, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_f32_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famax_f32_xu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famax_f32_x(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f32, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_f32_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famax_f32_zu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+svfloat32_t test_famax_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f32, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_n_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famax_n_f32_mu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famax_n_f32_m(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f32, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_n_f32_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famax_n_f32_xu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famax_n_f32_x(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f32, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_n_f32_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famax_n_f32_zu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+svfloat32_t test_famax_n_f32_z(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f32, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_f64_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famax_f64_mu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famax_f64_m(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f64, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_f64_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famax_f64_xu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famax_f64_x(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f64, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_f64_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famax_f64_zu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+svfloat64_t test_famax_f64_z(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f64, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_n_f64_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famax_n_f64_mu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famax_n_f64_m(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f64, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_n_f64_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famax_n_f64_xu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famax_n_f64_x(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f64, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_n_f64_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famax_n_f64_zu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+svfloat64_t test_famax_n_f64_z(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f64, _z)(pg, a, b);
+}
diff --git clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_luti.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_luti.c
new file mode 100644
index 000000000000..60c4828c407e
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_luti.c
@@ -0,0 +1,337 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
+// RUN:   -target-feature +sme -target-feature +sme2 -target-feature +lut -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \
+// RUN:   -target-feature +sve -target-feature +sve2 -target-feature +lut -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
+// RUN:   -target-feature +sve -target-feature +sve2 -target-feature +lut -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \
+// RUN:   -target-feature +sve -target-feature +sve2 -target-feature +lut -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +lut -target-feature +bf16 -O1 -Werror -Wall -o /dev/null %s
+#include <arm_sve.h>
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// SME-CHECK-LABEL: @test_svluti2_lane_s8(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti2.nxv16i8(<vscale x 16 x i8> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// SME-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+// CHECK-LABEL: @test_svluti2_lane_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti2.lane.nxv16i8(<vscale x 16 x i8> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svluti2_lane_s8u10__SVInt8_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti2.lane.nxv16i8(<vscale x 16 x i8> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svluti2_lane_s8(svint8_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti2_lane,_s8,)(table, indices, 0);
+}
+
+// SME-CHECK-LABEL: @test_svluti2_lane_u8(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti2.nxv16i8(<vscale x 16 x i8> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 3)
+// SME-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+// CHECK-LABEL: @test_svluti2_lane_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti2.lane.nxv16i8(<vscale x 16 x i8> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 3)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svluti2_lane_u8u11__SVUint8_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti2.lane.nxv16i8(<vscale x 16 x i8> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 3)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svluti2_lane_u8(svuint8_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti2_lane,_u8,)(table, indices, 3);
+}
+
+// SME-CHECK-LABEL: @test_svluti2_lane_s16(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti2.nxv8i16(<vscale x 8 x i16> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// SME-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+// CHECK-LABEL: @test_svluti2_lane_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti2.lane.nxv8i16(<vscale x 8 x i16> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svluti2_lane_s16u11__SVInt16_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti2.lane.nxv8i16(<vscale x 8 x i16> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svluti2_lane_s16(svint16_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti2_lane,_s16,)(table, indices, 0);
+}
+
+// SME-CHECK-LABEL: @test_svluti2_lane_u16(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti2.nxv8i16(<vscale x 8 x i16> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 7)
+// SME-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+// CHECK-LABEL: @test_svluti2_lane_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti2.lane.nxv8i16(<vscale x 8 x i16> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 7)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svluti2_lane_u16u12__SVUint16_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti2.lane.nxv8i16(<vscale x 8 x i16> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 7)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svluti2_lane_u16(svuint16_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti2_lane,_u16,)(table, indices, 7);
+}
+
+// SME-CHECK-LABEL: @test_svluti2_lane_f16(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti2.nxv8f16(<vscale x 8 x half> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 5)
+// SME-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+// CHECK-LABEL: @test_svluti2_lane_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti2.lane.nxv8f16(<vscale x 8 x half> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 5)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svluti2_lane_f16u13__SVFloat16_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti2.lane.nxv8f16(<vscale x 8 x half> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 5)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svluti2_lane_f16(svfloat16_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti2_lane,_f16,)(table, indices, 5);
+}
+
+// SME-CHECK-LABEL: @test_svluti2_lane_bf16(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti2.nxv8bf16(<vscale x 8 x bfloat> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 2)
+// SME-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+// CHECK-LABEL: @test_svluti2_lane_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti2.lane.nxv8bf16(<vscale x 8 x bfloat> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 2)
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svluti2_lane_bf16u14__SVBfloat16_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti2.lane.nxv8bf16(<vscale x 8 x bfloat> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 2)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svluti2_lane_bf16(svbfloat16_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti2_lane,_bf16,)(table, indices, 2);
+}
+
+// SME-CHECK-LABEL: @test_svluti4_lane_s8(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti4.nxv16i8(<vscale x 16 x i8> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// SME-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+// CHECK-LABEL: @test_svluti4_lane_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti4.lane.nxv16i8(<vscale x 16 x i8> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svluti4_lane_s8u10__SVInt8_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti4.lane.nxv16i8(<vscale x 16 x i8> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svluti4_lane_s8(svint8_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti4_lane,_s8,)(table, indices, 0);
+}
+
+// SME-CHECK-LABEL: @test_svluti4_lane_u8(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti4.nxv16i8(<vscale x 16 x i8> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 1)
+// SME-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+// CHECK-LABEL: @test_svluti4_lane_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti4.lane.nxv16i8(<vscale x 16 x i8> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 1)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svluti4_lane_u8u11__SVUint8_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti4.lane.nxv16i8(<vscale x 16 x i8> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svluti4_lane_u8(svuint8_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti4_lane,_u8,)(table, indices, 1);
+}
+
+// SME-CHECK-LABEL: @test_svluti4_lane_s16(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.nxv8i16(<vscale x 8 x i16> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// SME-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+// CHECK-LABEL: @test_svluti4_lane_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.nxv8i16(<vscale x 8 x i16> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svluti4_lane_s16u11__SVInt16_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.nxv8i16(<vscale x 8 x i16> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svluti4_lane_s16(svint16_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti4_lane,_s16,)(table, indices, 0);
+}
+
+// SME-CHECK-LABEL: @test_svluti4_lane_u16(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.nxv8i16(<vscale x 8 x i16> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 7)
+// SME-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+// CHECK-LABEL: @test_svluti4_lane_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.nxv8i16(<vscale x 8 x i16> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 3)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svluti4_lane_u16u12__SVUint16_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.nxv8i16(<vscale x 8 x i16> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 3)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svluti4_lane_u16(svuint16_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti4_lane,_u16,)(table, indices, 3);
+}
+
+// SME-CHECK-LABEL: @test_svluti4_lane_f16(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.nxv8f16(<vscale x 8 x half> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 5)
+// SME-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+// CHECK-LABEL: @test_svluti4_lane_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.lane.nxv8f16(<vscale x 8 x half> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 2)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svluti4_lane_f16u13__SVFloat16_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.lane.nxv8f16(<vscale x 8 x half> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 2)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svluti4_lane_f16(svfloat16_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti4_lane,_f16,)(table, indices, 2);
+}
+
+// SME-CHECK-LABEL: @test_svluti4_lane_bf16(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.nxv8bf16(<vscale x 8 x bfloat> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 2)
+// SME-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+// CHECK-LABEL: @test_svluti4_lane_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.lane.nxv8bf16(<vscale x 8 x bfloat> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 1)
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svluti4_lane_bf16u14__SVBfloat16_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.lane.nxv8bf16(<vscale x 8 x bfloat> [[TABLE:%.*]], <vscale x 16 x i8> [[INDICES:%.*]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svluti4_lane_bf16(svbfloat16_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti4_lane,_bf16,)(table, indices, 1);
+}
+
+// SME-CHECK-LABEL: @test_svluti4_lane_s16_x2(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[TABLE:%.*]], i64 0)
+// SME-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[TABLE]], i64 8)
+// SME-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// SME-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
+// CHECK-LABEL: @test_svluti4_lane_s16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[TABLE:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[TABLE]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_s16_x211svint16x2_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[TABLE:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[TABLE]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 16 x i8> [[INDICES:%.*]], i32 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
+//
+svint16_t test_svluti4_lane_s16_x2(svint16x2_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti4_lane,_s16,_x2)(table, indices, 0);
+}
+
+// SME-CHECK-LABEL: @test_svluti4_lane_u16_x2(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[TABLE:%.*]], i64 0)
+// SME-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[TABLE]], i64 8)
+// SME-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 16 x i8> [[INDICES:%.*]], i32 7)
+// SME-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
+// CHECK-LABEL: @test_svluti4_lane_u16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[TABLE:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[TABLE]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 16 x i8> [[INDICES:%.*]], i32 3)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_u16_x212svuint16x2_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[TABLE:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[TABLE]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 16 x i8> [[INDICES:%.*]], i32 3)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
+//
+svuint16_t test_svluti4_lane_u16_x2(svuint16x2_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti4_lane,_u16,_x2)(table, indices, 3);
+}
+
+// SME-CHECK-LABEL: @test_svluti4_lane_f16_x2(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[TABLE:%.*]], i64 0)
+// SME-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[TABLE]], i64 8)
+// SME-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.x2.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 16 x i8> [[INDICES:%.*]], i32 5)
+// SME-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+// CHECK-LABEL: @test_svluti4_lane_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[TABLE:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[TABLE]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.lane.x2.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 16 x i8> [[INDICES:%.*]], i32 2)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_f16_x213svfloat16x2_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[TABLE:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[TABLE]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.lane.x2.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 16 x i8> [[INDICES:%.*]], i32 2)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_svluti4_lane_f16_x2(svfloat16x2_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti4_lane,_f16,_x2)(table, indices, 2);
+}
+
+// SME-CHECK-LABEL: @test_svluti4_lane_bf16_x2(
+// SME-CHECK-NEXT:  entry:
+// SME-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[TABLE:%.*]], i64 0)
+// SME-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[TABLE]], i64 8)
+// SME-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.x2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 16 x i8> [[INDICES:%.*]], i32 2)
+// SME-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
+// CHECK-LABEL: @test_svluti4_lane_bf16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[TABLE:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[TABLE]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.lane.x2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 16 x i8> [[INDICES:%.*]], i32 1)
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
+//
+// CPP-CHECK-LABEL: @_Z25test_svluti4_lane_bf16_x214svbfloat16x2_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[TABLE:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[TABLE]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.lane.x2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 16 x i8> [[INDICES:%.*]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
+//
+svbfloat16_t test_svluti4_lane_bf16_x2(svbfloat16x2_t table, svuint8_t indices) MODE_ATTR{
+    return SVE_ACLE_FUNC(svluti4_lane,_bf16,_x2)(table, indices, 1);
+}
diff --git clang/test/CodeGen/arm-neon-range-checks.c clang/test/CodeGen/arm-neon-range-checks.c
deleted file mode 100644
index 360ff6be1665..000000000000
--- clang/test/CodeGen/arm-neon-range-checks.c
+++ /dev/null
@@ -1,426 +0,0 @@
-// RUN: %clang_cc1 -triple arm64-none-eabi -target-feature +neon -target-feature +dotprod -target-feature +v8.1a -verify %s
-// RUN: %clang_cc1 -triple armv8.1a-none-eabi -target-feature +neon -target-feature +dotprod -target-feature +v8.1a -verify %s
-
-// REQUIRES: aarch64-registered-target || arm-registered-target
-
-#include <arm_neon.h>
-
-void test_vdot_lane(int32x2_t r, int8x8_t a, int8x8_t b) {
-  vdot_lane_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vdot_lane_s32(r, a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vdot_lane_s32(r, a, b, 0);
-  vdot_lane_s32(r, a, b, 1);
-}
-
-void test_vdotq_lane(int32x4_t r, int8x16_t a, int8x8_t b) {
-  vdotq_lane_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vdotq_lane_s32(r, a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vdotq_lane_s32(r, a, b, 0);
-  vdotq_lane_s32(r, a, b, 1);
-}
-
-#if defined(__aarch64__)
-void test_vdot_laneq(int32x2_t r, int8x8_t a, int8x16_t b) {
-  vdot_laneq_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vdot_laneq_s32(r, a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vdot_laneq_s32(r, a, b, 0);
-  vdot_laneq_s32(r, a, b, 3);
-}
-
-void test_vdotq_laneq(int32x4_t r, int8x16_t a, int8x16_t b) {
-  vdotq_laneq_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vdotq_laneq_s32(r, a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vdotq_laneq_s32(r, a, b, 0);
-  vdotq_laneq_s32(r, a, b, 3);
-}
-#endif
-
-void test_vdup_lane(int32x2_t v) {
-  vdup_lane_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vdup_lane_s32(v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vdup_lane_s32(v, 0);
-  vdup_lane_s32(v, 1);
-}
-
-void test_vdupq_lane(int32x2_t v) {
-  vdupq_lane_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vdupq_lane_s32(v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vdupq_lane_s32(v, 0);
-  vdupq_lane_s32(v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vdup_laneq(int32x4_t v) {
-  vdup_laneq_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vdup_laneq_s32(v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vdup_laneq_s32(v, 0);
-  vdup_laneq_s32(v, 3);
-}
-
-void test_vdupq_laneq(int32x4_t v) {
-  vdupq_laneq_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vdupq_laneq_s32(v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vdupq_laneq_s32(v, 0);
-  vdupq_laneq_s32(v, 3);
-}
-#endif
-
-void test_vmla_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
-  vmla_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmla_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmla_lane_s32(a, b, v, 0);
-  vmla_lane_s32(a, b, v, 1);
-}
-
-void test_vmlaq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
-  vmlaq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmlaq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmlaq_lane_s32(a, b, v, 0);
-  vmlaq_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vmla_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
-  vmla_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmla_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmla_laneq_s32(a, b, v, 0);
-  vmla_laneq_s32(a, b, v, 3);
-}
-
-void test_vmlaq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
-  vmlaq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmlaq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmlaq_laneq_s32(a, b, v, 0);
-  vmlaq_laneq_s32(a, b, v, 3);
-}
-
-void test_vmlal_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
-  vmlal_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmlal_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmlal_high_lane_s32(a, b, v, 0);
-  vmlal_high_lane_s32(a, b, v, 1);
-}
-
-void test_vmlal_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
-  vmlal_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmlal_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmlal_high_laneq_s32(a, b, v, 0);
-  vmlal_high_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vmlal_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
-  vmlal_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmlal_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmlal_lane_s32(a, b, v, 0);
-  vmlal_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vmlal_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
-  vmlal_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmlal_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmlal_laneq_s32(a, b, v, 0);
-  vmlal_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vmls_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
-  vmls_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmls_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmls_lane_s32(a, b, v, 0);
-  vmls_lane_s32(a, b, v, 1);
-}
-
-void test_vmlsq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
-  vmlsq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmlsq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmlsq_lane_s32(a, b, v, 0);
-  vmlsq_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vmls_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
-  vmls_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmls_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmls_laneq_s32(a, b, v, 0);
-  vmls_laneq_s32(a, b, v, 3);
-}
-
-void test_vmlsq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
-  vmlsq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmlsq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmlsq_laneq_s32(a, b, v, 0);
-  vmlsq_laneq_s32(a, b, v, 3);
-}
-
-void test_vmlsl_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
-  vmlsl_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmlsl_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmlsl_high_lane_s32(a, b, v, 0);
-  vmlsl_high_lane_s32(a, b, v, 1);
-}
-
-void test_vmlsl_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
-  vmlsl_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmlsl_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmlsl_high_laneq_s32(a, b, v, 0);
-  vmlsl_high_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vmlsl_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
-  vmlsl_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmlsl_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmlsl_lane_s32(a, b, v, 0);
-  vmlsl_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vmlsl_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
-  vmlsl_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmlsl_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmlsl_laneq_s32(a, b, v, 0);
-  vmlsl_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vmull_lane(int32x2_t a, int32x2_t b) {
-  vmull_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmull_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmull_lane_s32(a, b, 0);
-  vmull_lane_s32(a, b, 1);
-}
-
-#if defined(__aarch64__)
-void test_vmull_laneq(int32x2_t a, int32x4_t b) {
-  vmull_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmull_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmull_laneq_s32(a, b, 0);
-  vmull_laneq_s32(a, b, 3);
-}
-
-void test_vmull_high_lane(int32x4_t a, int32x2_t b) {
-  vmull_high_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmull_high_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmull_high_lane_s32(a, b, 0);
-  vmull_high_lane_s32(a, b, 1);
-}
-
-void test_vmull_high_laneq(int32x4_t a, int32x4_t b) {
-  vmull_high_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmull_high_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmull_high_laneq_s32(a, b, 0);
-  vmull_high_laneq_s32(a, b, 3);
-}
-
-void test_vqdmlal_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
-  vqdmlal_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmlal_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmlal_high_lane_s32(a, b, v, 0);
-  vqdmlal_high_lane_s32(a, b, v, 1);
-}
-
-void test_vqdmlal_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
-  vqdmlal_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmlal_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmlal_high_laneq_s32(a, b, v, 0);
-  vqdmlal_high_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vqdmlal_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
-  vqdmlal_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmlal_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmlal_lane_s32(a, b, v, 0);
-  vqdmlal_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vqdmlal_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
-  vqdmlal_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmlal_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmlal_laneq_s32(a, b, v, 0);
-  vqdmlal_laneq_s32(a, b, v, 3);
-}
-
-void test_vqdmlsl_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
-  vqdmlsl_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmlsl_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmlsl_high_lane_s32(a, b, v, 0);
-  vqdmlsl_high_lane_s32(a, b, v, 1);
-}
-
-void test_vqdmlsl_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
-  vqdmlsl_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmlsl_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmlsl_high_laneq_s32(a, b, v, 0);
-  vqdmlsl_high_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vqdmlsl_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
-  vqdmlsl_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmlsl_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmlsl_lane_s32(a, b, v, 0);
-  vqdmlsl_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vqdmlsl_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
-  vqdmlsl_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmlsl_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmlsl_laneq_s32(a, b, v, 0);
-  vqdmlsl_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vqdmulh_lane(int32x2_t a, int32x2_t b) {
-  vqdmulh_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmulh_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmulh_lane_s32(a, b, 0);
-  vqdmulh_lane_s32(a, b, 1);
-}
-
-void test_vqdmulhq_lane(int32x4_t a, int32x2_t b) {
-  vqdmulhq_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmulhq_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmulhq_lane_s32(a, b, 0);
-  vqdmulhq_lane_s32(a, b, 1);
-}
-
-#if defined(__aarch64__)
-void test_vqdmulh_laneq(int32x2_t a, int32x4_t b) {
-  vqdmulh_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmulh_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmulh_laneq_s32(a, b, 0);
-  vqdmulh_laneq_s32(a, b, 3);
-}
-
-void test_vqdmulhq_laneq(int32x4_t a, int32x4_t b) {
-  vqdmulhq_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmulhq_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmulhq_laneq_s32(a, b, 0);
-  vqdmulhq_laneq_s32(a, b, 3);
-}
-
-void test_vqdmull_high_lane(int32x4_t a, int32x2_t b) {
-  vqdmull_high_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmull_high_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmull_high_lane_s32(a, b, 0);
-  vqdmull_high_lane_s32(a, b, 1);
-}
-
-void test_vqdmull_high_laneq(int32x4_t a, int32x4_t b) {
-  vqdmull_high_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmull_high_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmull_high_laneq_s32(a, b, 0);
-  vqdmull_high_laneq_s32(a, b, 3);
-}
-#endif
-
-void test_vqdmull_lane(int32x2_t a, int32x2_t v) {
-  vqdmull_lane_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmull_lane_s32(a, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmull_lane_s32(a, v, 0);
-  vqdmull_lane_s32(a, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vqdmull_laneq(int32x2_t a, int32x4_t v) {
-  vqdmull_laneq_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmull_laneq_s32(a, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmull_laneq_s32(a, v, 0);
-  vqdmull_laneq_s32(a, v, 3);
-}
-#endif
-
-void test_vqrdmlah_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
-  vqrdmlah_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqrdmlah_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqrdmlah_lane_s32(a, b, v, 0);
-  vqrdmlah_lane_s32(a, b, v, 1);
-}
-
-void test_vqrdmlahq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
-  vqrdmlahq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqrdmlahq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqrdmlahq_lane_s32(a, b, v, 0);
-  vqrdmlahq_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vqrdmlah_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
-  vqrdmlah_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqrdmlah_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqrdmlah_laneq_s32(a, b, v, 0);
-  vqrdmlah_laneq_s32(a, b, v, 3);
-}
-
-void test_vqrdmlahq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
-  vqrdmlahq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqrdmlahq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqrdmlahq_laneq_s32(a, b, v, 0);
-  vqrdmlahq_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vqrdmlsh_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
-  vqrdmlsh_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqrdmlsh_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqrdmlsh_lane_s32(a, b, v, 0);
-  vqrdmlsh_lane_s32(a, b, v, 1);
-}
-
-void test_vqrdmlshq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
-  vqrdmlshq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqrdmlshq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqrdmlshq_lane_s32(a, b, v, 0);
-  vqrdmlshq_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vqrdmlsh_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
-  vqrdmlsh_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqrdmlsh_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqrdmlsh_laneq_s32(a, b, v, 0);
-  vqrdmlsh_laneq_s32(a, b, v, 3);
-}
-
-void test_vqrdmlshq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
-  vqrdmlshq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqrdmlshq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqrdmlshq_laneq_s32(a, b, v, 0);
-  vqrdmlshq_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vqrdmulh_lane(int32x2_t a, int32x2_t v) {
-  vqrdmulh_lane_s32(a, v,  -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqrdmulh_lane_s32(a, v,  2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqrdmulh_lane_s32(a, v,  0);
-  vqrdmulh_lane_s32(a, v,  1);
-}
-
-void test_vqrdmulhq_lane(int32x4_t a, int32x2_t v) {
-  vqrdmulhq_lane_s32(a, v,  -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqrdmulhq_lane_s32(a, v,  2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqrdmulhq_lane_s32(a, v,  0);
-  vqrdmulhq_lane_s32(a, v,  1);
-}
-
-#if defined(__aarch64__)
-void test_vqrdmulh_laneq(int32x2_t a, int32x4_t v) {
-  vqrdmulh_laneq_s32(a, v,  -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqrdmulh_laneq_s32(a, v,  4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqrdmulh_laneq_s32(a, v,  0);
-  vqrdmulh_laneq_s32(a, v,  3);
-}
-
-void test_vqrdmulhq_laneq(int32x4_t a, int32x4_t v) {
-  vqrdmulhq_laneq_s32(a, v,  -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqrdmulhq_laneq_s32(a, v,  4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqrdmulhq_laneq_s32(a, v,  0);
-  vqrdmulhq_laneq_s32(a, v,  3);
-}
-#endif
diff --git clang/test/CodeGen/attr-counted-by.c clang/test/CodeGen/attr-counted-by.c
index ab36b6e7720b..a06e815737f4 100644
--- clang/test/CodeGen/attr-counted-by.c
+++ clang/test/CodeGen/attr-counted-by.c
@@ -118,7 +118,7 @@ void test1(struct annotated *p, int index, int val) {
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = shl i32 [[DOT_COUNTED_BY_LOAD]], 2
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
 // SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP2]]
@@ -134,7 +134,7 @@ void test1(struct annotated *p, int index, int val) {
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP0]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
@@ -142,7 +142,7 @@ void test1(struct annotated *p, int index, int val) {
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
@@ -150,7 +150,7 @@ void test1(struct annotated *p, int index, int val) {
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
@@ -207,7 +207,7 @@ size_t test2_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 2
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP3]], i64 4)
@@ -231,7 +231,7 @@ size_t test2_bdos(struct annotated *p) {
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP4]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
@@ -239,7 +239,7 @@ size_t test2_bdos(struct annotated *p) {
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
@@ -247,7 +247,7 @@ size_t test2_bdos(struct annotated *p) {
 // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret void
 //
diff --git clang/test/CodeGen/builtins-wasm.c clang/test/CodeGen/builtins-wasm.c
index 3010b8954f1c..8943a92faad0 100644
--- clang/test/CodeGen/builtins-wasm.c
+++ clang/test/CodeGen/builtins-wasm.c
@@ -834,16 +834,16 @@ f16x8 splat_f16x8(float a) {
   return __builtin_wasm_splat_f16x8(a);
 }
 
-float extract_lane_f16x8(f16x8 a, int i) {
-  // WEBASSEMBLY:  %0 = tail call float @llvm.wasm.extract.lane.f16x8(<8 x half> %a, i32 %i)
+float extract_lane_f16x8(f16x8 a) {
+  // WEBASSEMBLY:  %0 = tail call float @llvm.wasm.extract.lane.f16x8(<8 x half> %a, i32 7)
   // WEBASSEMBLY-NEXT: ret float %0
-  return __builtin_wasm_extract_lane_f16x8(a, i);
+  return __builtin_wasm_extract_lane_f16x8(a, 7);
 }
 
-f16x8 replace_lane_f16x8(f16x8 a, int i, float v) {
-  // WEBASSEMBLY:  %0 = tail call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %a, i32 %i, float %v)
+f16x8 replace_lane_f16x8(f16x8 a, float v) {
+  // WEBASSEMBLY:  %0 = tail call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %a, i32 7, float %v)
   // WEBASSEMBLY-NEXT: ret <8 x half> %0
-  return __builtin_wasm_replace_lane_f16x8(a, i, v);
+  return __builtin_wasm_replace_lane_f16x8(a, 7, v);
 }
 
 f16x8 min_f16x8(f16x8 a, f16x8 b) {
diff --git clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c
index 39ede01d6e3b..8a560a47ad1e 100644
--- clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c
+++ clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c
@@ -33,7 +33,7 @@ char *add_unsigned(char *base, unsigned long offset) {
   // CHECK-NEXT:                        store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8
   // CHECK-NEXT:                        %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8
   // CHECK-NEXT:                        %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
+  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize
diff --git clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c
index e93dbcb9f647..d884993ffb2b 100644
--- clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c
+++ clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c
@@ -50,7 +50,7 @@ char *var_var(char *base, unsigned long offset) {
   // CHECK-NEXT:                        store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8
   // CHECK-NEXT:                        %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8
   // CHECK-NEXT:                        %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
+  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize
@@ -83,7 +83,7 @@ char *var_zero(char *base) {
   // CHECK-NEXT:                          %[[BASE_ADDR:.*]] = alloca ptr, align 8
   // CHECK-NEXT:                          store ptr %[[BASE]], ptr %[[BASE_ADDR]], align 8
   // CHECK-NEXT:                          %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8
-  // CHECK-NEXT:                          %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 0
+  // CHECK-NEXT:                          %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 0
   // CHECK-SANITIZE-C-NEXT:               %[[BASE_RELOADED_INT:.*]] = ptrtoint ptr %[[BASE_RELOADED]] to i64, !nosanitize
   // CHECK-SANITIZE-C-NEXT:               %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], 0, !nosanitize
   // CHECK-SANITIZE-C-NEXT:               %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne ptr %[[BASE_RELOADED]], null, !nosanitize
@@ -111,7 +111,7 @@ char *var_one(char *base) {
   // CHECK-NEXT:                        %[[BASE_ADDR:.*]] = alloca ptr, align 8
   // CHECK-NEXT:                        store ptr %[[BASE]], ptr %[[BASE_ADDR]], align 8
   // CHECK-NEXT:                        %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 1
+  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 1
   // CHECK-SANITIZE-NEXT:               %[[BASE_RELOADED_INT:.*]] = ptrtoint ptr %[[BASE_RELOADED]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], 1, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne ptr %[[BASE_RELOADED]], null, !nosanitize
@@ -140,7 +140,7 @@ char *var_allones(char *base) {
   // CHECK-NEXT:                        %[[BASE_ADDR:.*]] = alloca ptr, align 8
   // CHECK-NEXT:                        store ptr %[[BASE]], ptr %[[BASE_ADDR]], align 8
   // CHECK-NEXT:                        %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 -1
+  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 -1
   // CHECK-SANITIZE-NEXT:               %[[BASE_RELOADED_INT:.*]] = ptrtoint ptr %[[BASE_RELOADED]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], -1, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne ptr %[[BASE_RELOADED]], null, !nosanitize
@@ -171,7 +171,7 @@ char *nullptr_var(unsigned long offset) {
   // CHECK-NEXT:                        %[[OFFSET_ADDR:.*]] = alloca i64, align 8
   // CHECK-NEXT:                        store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8
   // CHECK-NEXT:                        %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr null, i64 %[[OFFSET_RELOADED]]
+  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr null, i64 %[[OFFSET_RELOADED]]
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize
@@ -217,17 +217,17 @@ char *nullptr_zero(void) {
 char *nullptr_one_BAD(void) {
   // CHECK:                           define{{.*}} ptr @nullptr_one_BAD()
   // CHECK-NEXT:                      [[ENTRY:.*]]:
-  // CHECK-SANITIZE-NEXT:             %[[CMP:.*]] = icmp ne i64 ptrtoint (ptr getelementptr inbounds (i8, ptr null, i64 1) to i64), 0, !nosanitize
+  // CHECK-SANITIZE-NEXT:             %[[CMP:.*]] = icmp ne i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr null, i64 1) to i64), 0, !nosanitize
   // CHECK-SANITIZE-C-NEXT:           %[[COND:.*]] = and i1 false, %[[CMP]], !nosanitize
   // CHECK-SANITIZE-CPP-NEXT:         %[[COND:.*]] = icmp eq i1 false, %[[CMP]], !nosanitize
   // CHECK-SANITIZE-NEXT:             br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_POINTER_OVERFLOW]]:
-  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_700]], i64 0, i64 ptrtoint (ptr getelementptr inbounds (i8, ptr null, i64 1) to i64))
-  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_700]], i64 0, i64 ptrtoint (ptr getelementptr inbounds (i8, ptr null, i64 1) to i64))
+  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_700]], i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr null, i64 1) to i64))
+  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_700]], i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr null, i64 1) to i64))
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        ret ptr getelementptr inbounds (i8, ptr null, i64 1)
+  // CHECK-NEXT:                        ret ptr getelementptr inbounds nuw (i8, ptr null, i64 1)
   static char *const base = (char *)0;
   static const unsigned long offset = 1;
 #line 700
@@ -237,17 +237,17 @@ char *nullptr_one_BAD(void) {
 char *nullptr_allones_BAD(void) {
   // CHECK:                           define{{.*}} ptr @nullptr_allones_BAD()
   // CHECK-NEXT:                      [[ENTRY:.*]]:
-  // CHECK-SANITIZE-NEXT:             %[[CMP:.*]] = icmp ne i64 ptrtoint (ptr getelementptr inbounds (i8, ptr null, i64 -1) to i64), 0, !nosanitize
+  // CHECK-SANITIZE-NEXT:             %[[CMP:.*]] = icmp ne i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr null, i64 -1) to i64), 0, !nosanitize
   // CHECK-SANITIZE-C-NEXT:           %[[COND:.*]] = and i1 false, %[[CMP]], !nosanitize
   // CHECK-SANITIZE-CPP-NEXT:         %[[COND:.*]] = icmp eq i1 false, %[[CMP]], !nosanitize
   // CHECK-SANITIZE-NEXT:             br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_POINTER_OVERFLOW]]:
-  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_800]], i64 0, i64 ptrtoint (ptr getelementptr inbounds (i8, ptr null, i64 -1) to i64))
-  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_800]], i64 0, i64 ptrtoint (ptr getelementptr inbounds (i8, ptr null, i64 -1) to i64))
+  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_800]], i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr null, i64 -1) to i64))
+  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_800]], i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr null, i64 -1) to i64))
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        ret ptr getelementptr inbounds (i8, ptr null, i64 -1)
+  // CHECK-NEXT:                        ret ptr getelementptr inbounds nuw (i8, ptr null, i64 -1)
   static char *const base = (char *)0;
   static const unsigned long offset = -1;
 #line 800
@@ -262,7 +262,7 @@ char *one_var(unsigned long offset) {
   // CHECK-NEXT:                        %[[OFFSET_ADDR:.*]] = alloca i64, align 8
   // CHECK-NEXT:                        store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8
   // CHECK-NEXT:                        %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr inttoptr (i64 1 to ptr), i64 %[[OFFSET_RELOADED]]
+  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr inttoptr (i64 1 to ptr), i64 %[[OFFSET_RELOADED]]
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize
@@ -312,17 +312,17 @@ char *one_one_OK(void) {
   // CHECK:                           define{{.*}} ptr @one_one_OK()
   // CHECK-NEXT:                      [[ENTRY:.*]]:
   // CHECK-SANITIZE-NEXT:               %[[CMP1:.*]] = icmp ne ptr inttoptr (i64 1 to ptr), null, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1), 0, !nosanitize
+  // CHECK-SANITIZE-NEXT:               %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1), 0, !nosanitize
   // CHECK-SANITIZE-C-NEXT:             %[[COND:.*]] = and i1 %[[CMP1]], %[[CMP2]], !nosanitize
   // CHECK-SANITIZE-CPP-NEXT:           %[[COND:.*]] = icmp eq i1 %[[CMP1]], %[[CMP2]], !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_POINTER_OVERFLOW]]:
-  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1100]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1))
-  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1100]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1))
+  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1100]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1))
+  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1100]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1))
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        ret ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 1)
+  // CHECK-NEXT:                        ret ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 1)
   static char *const base = (char *)1;
   static const unsigned long offset = 1;
 #line 1100
@@ -333,17 +333,17 @@ char *one_allones_BAD(void) {
   // CHECK:                           define{{.*}} ptr @one_allones_BAD()
   // CHECK-NEXT:                      [[ENTRY:.*]]:
   // CHECK-SANITIZE-NEXT:               %[[CMP1:.*]] = icmp ne ptr inttoptr (i64 1 to ptr), null, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1), 0, !nosanitize
+  // CHECK-SANITIZE-NEXT:               %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1), 0, !nosanitize
   // CHECK-SANITIZE-C-NEXT:             %[[COND:.*]] = and i1 %[[CMP1]], %[[CMP2]], !nosanitize
   // CHECK-SANITIZE-CPP-NEXT:           %[[COND:.*]] = icmp eq i1 %[[CMP1]], %[[CMP2]], !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_POINTER_OVERFLOW]]:
-  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1200]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1))
-  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1200]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1))
+  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1200]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1))
+  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1200]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1))
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        ret ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 -1)
+  // CHECK-NEXT:                        ret ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 -1)
   static char *const base = (char *)1;
   static const unsigned long offset = -1;
 #line 1200
@@ -358,7 +358,7 @@ char *allones_var(unsigned long offset) {
   // CHECK-NEXT:                        %[[OFFSET_ADDR:.*]] = alloca i64, align 8
   // CHECK-NEXT:                        store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8
   // CHECK-NEXT:                        %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr inttoptr (i64 -1 to ptr), i64 %[[OFFSET_RELOADED]]
+  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr inttoptr (i64 -1 to ptr), i64 %[[OFFSET_RELOADED]]
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize
@@ -408,17 +408,17 @@ char *allones_one_BAD(void) {
   // CHECK: define{{.*}} ptr @allones_one_BAD()
   // CHECK-NEXT: [[ENTRY:.*]]:
   // CHECK-SANITIZE-NEXT:               %[[CMP1:.*]] = icmp ne ptr inttoptr (i64 -1 to ptr), null, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1), 0, !nosanitize
+  // CHECK-SANITIZE-NEXT:               %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1), 0, !nosanitize
   // CHECK-SANITIZE-C-NEXT:             %[[COND:.*]] = and i1 %[[CMP1]], %[[CMP2]], !nosanitize
   // CHECK-SANITIZE-CPP-NEXT:           %[[COND:.*]] = icmp eq i1 %[[CMP1]], %[[CMP2]], !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_POINTER_OVERFLOW]]:
-  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1500]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1))
-  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1500]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1))
+  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1500]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1))
+  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1500]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1))
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        ret ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 1)
+  // CHECK-NEXT:                        ret ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 1)
   static char *const base = (char *)-1;
   static const unsigned long offset = 1;
 #line 1500
@@ -429,17 +429,17 @@ char *allones_allones_OK(void) {
   // CHECK: define{{.*}} ptr @allones_allones_OK()
   // CHECK-NEXT: [[ENTRY:.*]]:
   // CHECK-SANITIZE-NEXT:               %[[CMP1:.*]] = icmp ne ptr inttoptr (i64 -1 to ptr), null, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1), 0, !nosanitize
+  // CHECK-SANITIZE-NEXT:               %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1), 0, !nosanitize
   // CHECK-SANITIZE-C-NEXT:             %[[COND:.*]] = and i1 %[[CMP1]], %[[CMP2]], !nosanitize
   // CHECK-SANITIZE-CPP-NEXT:           %[[COND:.*]] = icmp eq i1 %[[CMP1]], %[[CMP2]], !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_POINTER_OVERFLOW]]:
-  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1600]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1))
-  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1600]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1))
+  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1600]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1))
+  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1600]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1))
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        ret ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 -1)
+  // CHECK-NEXT:                        ret ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 -1)
   static char *const base = (char *)-1;
   static const unsigned long offset = -1;
 #line 1600
@@ -461,7 +461,7 @@ char *void_ptr(void *base, unsigned long offset) {
   // CHECK-NEXT:                        store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8
   // CHECK-NEXT:                        %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8
   // CHECK-NEXT:                        %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
+  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize
diff --git clang/test/CodeGen/catch-pointer-overflow-volatile.c clang/test/CodeGen/catch-pointer-overflow-volatile.c
index 4b0653a0ae59..626bbc0db7af 100644
--- clang/test/CodeGen/catch-pointer-overflow-volatile.c
+++ clang/test/CodeGen/catch-pointer-overflow-volatile.c
@@ -23,7 +23,7 @@ char *volatile_ptr(char *volatile base, unsigned long offset) {
   // CHECK-NEXT:                        store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8
   // CHECK-NEXT:                        %[[BASE_RELOADED:.*]] = load volatile ptr, ptr %[[BASE_ADDR]], align 8
   // CHECK-NEXT:                        %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
+  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize
diff --git clang/test/CodeGen/catch-pointer-overflow.c clang/test/CodeGen/catch-pointer-overflow.c
index 899af73bd81e..1f7f1729098c 100644
--- clang/test/CodeGen/catch-pointer-overflow.c
+++ clang/test/CodeGen/catch-pointer-overflow.c
@@ -30,7 +30,7 @@ char *add_unsigned(char *base, unsigned long offset) {
   // CHECK-NEXT:                        store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8
   // CHECK-NEXT:                        %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8
   // CHECK-NEXT:                        %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
+  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize
@@ -179,7 +179,7 @@ char *postinc(char *base) {
   // CHECK-NEXT:                        %[[BASE_ADDR:.*]] = alloca ptr, align 8
   // CHECK-NEXT:                        store ptr %[[BASE]], ptr %[[BASE_ADDR]], align 8
   // CHECK-NEXT:                        %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i32 1
+  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i32 1
   // CHECK-SANITIZE-NEXT:               %[[BASE_RELOADED_INT:.*]] = ptrtoint ptr %[[BASE_RELOADED]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], 1, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne ptr %[[BASE_RELOADED]], null, !nosanitize
@@ -241,7 +241,7 @@ char *preinc(char *base) {
   // CHECK-NEXT:                        %[[BASE_ADDR:.*]] = alloca ptr, align 8
   // CHECK-NEXT:                        store ptr %[[BASE]], ptr %[[BASE_ADDR]], align 8
   // CHECK-NEXT:                        %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i32 1
+  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i32 1
   // CHECK-SANITIZE-NEXT:               %[[BASE_RELOADED_INT:.*]] = ptrtoint ptr %[[BASE_RELOADED]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], 1, !nosanitize
   // CHECK-SANITIZE-NEXT:               %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne ptr %[[BASE_RELOADED]], null, !nosanitize
diff --git clang/test/CodeGen/ext-int.c clang/test/CodeGen/ext-int.c
index 714b7e122a70..e3d609a4ba4a 100644
--- clang/test/CodeGen/ext-int.c
+++ clang/test/CodeGen/ext-int.c
@@ -154,7 +154,7 @@ _BitInt(129) *f1(_BitInt(129) *p) {
 }
 
 char *f2(char *p) {
-  // CHECK64: getelementptr inbounds i8, {{.*}} i64 24
+  // CHECK64: getelementptr inbounds nuw i8, {{.*}} i64 24
   return p + sizeof(_BitInt(129));
 }
 
diff --git clang/test/CodeGen/hexagon-brev-ld-ptr-incdec.c clang/test/CodeGen/hexagon-brev-ld-ptr-incdec.c
index 7802168de4d7..d25d1e04f15f 100644
--- clang/test/CodeGen/hexagon-brev-ld-ptr-incdec.c
+++ clang/test/CodeGen/hexagon-brev-ld-ptr-incdec.c
@@ -6,9 +6,9 @@
 // the return value will be the value in A[2]
 // CHECK: @brev_ptr_inc
 // CHECK-DAG: llvm.hexagon.L2.loadri.pbr
-// CHECK-DAG: getelementptr inbounds i8, {{.*}}i32 4
-// CHECK-NOT: getelementptr inbounds i8, {{.*}}i32 8
-// CHECK-NOT: getelementptr inbounds i8, {{.*}}i32 4
+// CHECK-DAG: getelementptr inbounds nuw i8, {{.*}}i32 4
+// CHECK-NOT: getelementptr inbounds nuw i8, {{.*}}i32 8
+// CHECK-NOT: getelementptr inbounds nuw i8, {{.*}}i32 4
 int brev_ptr_inc(int A[], int B[]) {
   int *p0 = &B[0];
   int *p1 = &A[0];
diff --git clang/test/CodeGen/integer-overflow.c clang/test/CodeGen/integer-overflow.c
index 461b026d3961..9e8cde8b33b1 100644
--- clang/test/CodeGen/integer-overflow.c
+++ clang/test/CodeGen/integer-overflow.c
@@ -60,10 +60,10 @@ void test1(void) {
   // -fwrapv should turn off inbounds for GEP's, PR9256
   extern int* P;
   ++P;
-  // DEFAULT: getelementptr inbounds i32, ptr
+  // DEFAULT: getelementptr inbounds nuw i32, ptr
   // WRAPV: getelementptr i32, ptr
-  // TRAPV: getelementptr inbounds i32, ptr
-  // CATCH_UB_POINTER: getelementptr inbounds i32, ptr
+  // TRAPV: getelementptr inbounds nuw i32, ptr
+  // CATCH_UB_POINTER: getelementptr inbounds nuw i32, ptr
   // NOCATCH_UB_POINTER: getelementptr i32, ptr
 
   // PR9350: char pre-increment never overflows.
diff --git clang/test/CodeGen/ms-intrinsics.c clang/test/CodeGen/ms-intrinsics.c
index c3d64fda0b90..459a708d9b2e 100644
--- clang/test/CodeGen/ms-intrinsics.c
+++ clang/test/CodeGen/ms-intrinsics.c
@@ -156,7 +156,7 @@ unsigned char test_BitScanForward(unsigned long *Index, unsigned long Mask) {
 // CHECK:   [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ]
 // CHECK:   ret i8 [[RESULT]]
 // CHECK:   [[ISNOTZERO_LABEL]]:
-// CHECK:   [[IDXGEP:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr %Index, {{i64|i32}} 4
+// CHECK:   [[IDXGEP:%[a-z0-9._]+]] = getelementptr inbounds nuw i8, ptr %Index, {{i64|i32}} 4
 // CHECK:   [[INDEX:%[0-9]+]] = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 %Mask, i1 true)
 // CHECK:   store i32 [[INDEX]], ptr [[IDXGEP]], align 4
 // CHECK:   br label %[[END_LABEL]]
@@ -171,7 +171,7 @@ unsigned char test_BitScanReverse(unsigned long *Index, unsigned long Mask) {
 // CHECK:   [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ]
 // CHECK:   ret i8 [[RESULT]]
 // CHECK:   [[ISNOTZERO_LABEL]]:
-// CHECK:   [[IDXGEP:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr %Index, {{i64|i32}} 4
+// CHECK:   [[IDXGEP:%[a-z0-9._]+]] = getelementptr inbounds nuw i8, ptr %Index, {{i64|i32}} 4
 // CHECK:   [[REVINDEX:%[0-9]+]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 %Mask, i1 true)
 // CHECK:   [[INDEX:%[0-9]+]] = xor i32 [[REVINDEX]], 31
 // CHECK:   store i32 [[INDEX]], ptr [[IDXGEP]], align 4
@@ -437,10 +437,10 @@ unsigned char test_InterlockedCompareExchange128(
                                         ++ExchangeLow, ++ComparandResult);
 }
 // CHECK-64: define{{.*}}i8 @test_InterlockedCompareExchange128(ptr{{[a-z_ ]*}}%Destination, i64{{[a-z_ ]*}}%ExchangeHigh, i64{{[a-z_ ]*}}%ExchangeLow, ptr{{[a-z_ ]*}}%ComparandResult){{.*}}{
-// CHECK-64: %incdec.ptr = getelementptr inbounds i8, ptr %Destination, i64 8
+// CHECK-64: %incdec.ptr = getelementptr inbounds nuw i8, ptr %Destination, i64 8
 // CHECK-64: %inc = add nsw i64 %ExchangeHigh, 1
 // CHECK-64: %inc1 = add nsw i64 %ExchangeLow, 1
-// CHECK-64: %incdec.ptr2 = getelementptr inbounds i8, ptr %ComparandResult, i64 8
+// CHECK-64: %incdec.ptr2 = getelementptr inbounds nuw i8, ptr %ComparandResult, i64 8
 // CHECK-64: [[EH:%[0-9]+]] = zext i64 %inc to i128
 // CHECK-64: [[EL:%[0-9]+]] = zext i64 %inc1 to i128
 // CHECK-64: [[EHS:%[0-9]+]] = shl nuw i128 [[EH]], 64
@@ -486,7 +486,7 @@ short test_InterlockedIncrement16(short volatile *Addend) {
   return _InterlockedIncrement16(++Addend);
 }
 // CHECK: define{{.*}}i16 @test_InterlockedIncrement16(ptr{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK: %incdec.ptr = getelementptr inbounds i8, ptr %Addend, {{i64|i32}} 2
+// CHECK: %incdec.ptr = getelementptr inbounds nuw i8, ptr %Addend, {{i64|i32}} 2
 // CHECK: [[TMP:%[0-9]+]] = atomicrmw add ptr %incdec.ptr, i16 1 seq_cst, align 2
 // CHECK: [[RESULT:%[0-9]+]] = add i16 [[TMP]], 1
 // CHECK: ret i16 [[RESULT]]
@@ -496,7 +496,7 @@ long test_InterlockedIncrement(long volatile *Addend) {
   return _InterlockedIncrement(++Addend);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedIncrement(ptr{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK: %incdec.ptr = getelementptr inbounds i8, ptr %Addend, {{i64|i32}} 4
+// CHECK: %incdec.ptr = getelementptr inbounds nuw i8, ptr %Addend, {{i64|i32}} 4
 // CHECK: [[TMP:%[0-9]+]] = atomicrmw add ptr %incdec.ptr, i32 1 seq_cst, align 4
 // CHECK: [[RESULT:%[0-9]+]] = add i32 [[TMP]], 1
 // CHECK: ret i32 [[RESULT]]
diff --git clang/test/CodeGen/transparent-union-type.c clang/test/CodeGen/transparent-union-type.c
new file mode 100644
index 000000000000..f7fac25dc098
--- /dev/null
+++ clang/test/CodeGen/transparent-union-type.c
@@ -0,0 +1,108 @@
+// RUN: %clang_cc1 -triple powerpc64le-linux -O2 -target-cpu pwr7 -emit-llvm \
+// RUN:   -fshort-enums %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64
+// RUN: %clang_cc1 -triple powerpc64-linux -O2 -target-cpu pwr7 -emit-llvm \
+// RUN:   -fshort-enums %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64
+// RUN: %clang_cc1 -triple powerpc-linux -O2 -target-cpu pwr7 -emit-llvm \
+// RUN:   -fshort-enums %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-32
+// RUN: %clang_cc1 -triple powerpc64-aix -O2 -target-cpu pwr7 -emit-llvm \
+// RUN:   -fshort-enums %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64
+// RUN: %clang_cc1 -triple powerpc-aix -O2 -target-cpu pwr7 -emit-llvm \
+// RUN:   -fshort-enums %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-32
+// RUN: %clang_cc1 -triple riscv64-linux -O2 -emit-llvm -fshort-enums \
+// RUN:   %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64
+// RUN: %clang_cc1 -triple riscv32-linux -O2 -emit-llvm -fshort-enums \
+// RUN:   %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-32
+// RUN: %clang_cc1 -triple i386-linux -O2 -emit-llvm -fshort-enums \
+// RUN:   %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-32
+// RUN: %clang_cc1 -triple x86_64-linux -O2 -emit-llvm -fshort-enums \
+// RUN:   %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64
+// RUN: %clang_cc1 -triple armv7-linux -O2 -emit-llvm -fshort-enums \
+// RUN:   %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-32
+// RUN: %clang_cc1 -triple arm64 -target-abi darwinpcs -O2 -emit-llvm \
+// RUN:   -fshort-enums %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64
+// RUN: %clang_cc1 -triple aarch64 -target-abi darwinpcs -O2 -emit-llvm \
+// RUN:   -fshort-enums %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64
+
+typedef union tu_c {
+  signed char a;
+  signed char b;
+} tu_c_t __attribute__((transparent_union));
+
+typedef union tu_s {
+  short a;
+} tu_s_t __attribute__((transparent_union));
+
+typedef union tu_us {
+  unsigned short a;
+} tu_us_t __attribute__((transparent_union));
+
+typedef union tu_l {
+  long a;
+} tu_l_t __attribute__((transparent_union));
+
+// CHECK-LABEL: define{{.*}} void @ftest0(
+// CHECK-SAME: i8 noundef signext [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret void
+void ftest0(tu_c_t uc) { }
+
+// CHECK-LABEL: define{{.*}} void @ftest1(
+// CHECK-SAME: i16 noundef signext [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret void
+void ftest1(tu_s_t uc) { }
+
+// CHECK-LABEL: define{{.*}} void @ftest1b(
+// CHECK-SAME: ptr nocapture noundef readnone [[UC:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret void
+//
+void ftest1b(tu_s_t *uc) { }
+
+// CHECK-LABEL: define{{.*}} void @ftest2(
+// CHECK-SAME: i16 noundef zeroext [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret void
+void ftest2(tu_us_t uc) { }
+
+// CHECK-64-LABEL: define{{.*}} void @ftest3(
+// CHECK-64-SAME: i64 [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    ret void
+//
+// CHECK-32-LABEL: define{{.*}} void @ftest3(
+// CHECK-32-SAME: i32 [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-32-NEXT:  [[ENTRY:.*:]]
+// CHECK-32-NEXT:    ret void
+void ftest3(tu_l_t uc) { }
+
+typedef union etest {
+  enum flag {red, yellow, blue} fl;
+  enum weekend {sun, sat} b;
+} etest_t __attribute__((transparent_union));
+
+// CHECK-LABEL: define{{.*}} void @ftest4(
+// CHECK-SAME: i8 noundef zeroext [[A_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret void
+void ftest4(etest_t a) {}
+
+typedef union tu_ptr {
+  signed char *a;
+  unsigned short *b;
+  int *c;
+} tu_ptr_t __attribute__((transparent_union));
+
+// CHECK-LABEL: define{{.*}} void @ftest5(
+// CHECK-SAME: ptr nocapture readnone [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret void
+//
+void ftest5(tu_ptr_t uc) { }
+
+// CHECK-LABEL: define{{.*}} void @ftest6(
+// CHECK-SAME: ptr nocapture noundef readnone [[UC:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret void
+//
+void ftest6(tu_ptr_t *uc) { }
diff --git clang/test/CodeGen/ubsan-pointer-overflow.m clang/test/CodeGen/ubsan-pointer-overflow.m
index 9192598da92f..4ecdac655669 100644
--- clang/test/CodeGen/ubsan-pointer-overflow.m
+++ clang/test/CodeGen/ubsan-pointer-overflow.m
@@ -5,7 +5,7 @@ void variable_len_array_arith(int n, int k) {
   int vla[n];
   int (*p)[n] = &vla;
 
-  // CHECK: getelementptr inbounds i32, ptr {{.*}}, i64 [[INC:%.*]]
+  // CHECK: getelementptr inbounds nuw i32, ptr {{.*}}, i64 [[INC:%.*]]
   // CHECK: @llvm.smul.with.overflow.i64(i64 4, i64 [[INC]]), !nosanitize
   // CHECK-NOT: select
   // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}
diff --git clang/test/CodeGen/vla.c clang/test/CodeGen/vla.c
index 33621c5dd7a2..a22ba727df2f 100644
--- clang/test/CodeGen/vla.c
+++ clang/test/CodeGen/vla.c
@@ -120,7 +120,7 @@ int test4(unsigned n, char (*p)[n][n+1][6]) {
   // CHECK-NEXT: [[T2:%.*]] = udiv i32 [[T1]], 2
   // CHECK-NEXT: [[T3:%.*]] = mul nuw i32 [[DIM0]], [[DIM1]]
   // CHECK-NEXT: [[T4:%.*]] = mul nsw i32 [[T2]], [[T3]]
-  // CHECK-NEXT: [[T5:%.*]] = getelementptr inbounds [6 x i8], ptr [[T0]], i32 [[T4]]
+  // CHECK-NEXT: [[T5:%.*]] = getelementptr inbounds nuw [6 x i8], ptr [[T0]], i32 [[T4]]
   // CHECK-NEXT: [[T6:%.*]] = load i32, ptr [[N]], align 4
   // CHECK-NEXT: [[T7:%.*]] = udiv i32 [[T6]], 4
   // CHECK-NEXT: [[T8:%.*]] = sub i32 0, [[T7]]
diff --git clang/test/CodeGenCUDA/device-stub.cu clang/test/CodeGenCUDA/device-stub.cu
index 60304647bd4c..8695433f6df1 100644
--- clang/test/CodeGenCUDA/device-stub.cu
+++ clang/test/CodeGenCUDA/device-stub.cu
@@ -175,7 +175,7 @@ __device__ void device_use() {
 // HIP-SAME: section ".hipFatBinSegment"
 // * variable to save GPU binary handle after initialization
 // CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global ptr null
-// HIPNEF: @__[[PREFIX]]_gpubin_handle_{{[0-9a-f]+}} = external hidden global ptr, align 8
+// HIPNEF: @__[[PREFIX]]_gpubin_handle_{{[0-9a-f]+}} = internal global ptr null, align 8
 // * constant unnamed string with NVModuleID
 // CUDARDC: [[MODULE_ID_GLOBAL:@.*]] = private constant
 // CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32
diff --git clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp
index e842de633504..fd9786de3a94 100644
--- clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp
+++ clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp
@@ -152,16 +152,16 @@ void f_branch_elided()
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr [[__BEGIN1]]) #[[ATTR3]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]]
 // CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 0, i64 0
-// CHECK-NEXT:    store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16:![0-9]+]]
+// CHECK-NEXT:    store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr [[__END1]]) #[[ATTR3]]
 // CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]]
 // CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP2]], i64 0, i64 0
 // CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY1]], i64 4
-// CHECK-NEXT:    store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[TBAA16]]
+// CHECK-NEXT:    store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[TBAA14]]
 // CHECK-NEXT:    br label [[FOR_COND:%.*]]
 // CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[TBAA16]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[TBAA14]]
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[TMP3]], [[TMP4]]
 // CHECK-NEXT:    [[CMP_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CMP]], i1 true)
 // CHECK-NEXT:    br i1 [[CMP_EXPVAL]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -172,16 +172,16 @@ void f_branch_elided()
 // CHECK-NEXT:    br label [[FOR_END:%.*]]
 // CHECK:       for.body:
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR3]]
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
 // CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA2]]
 // CHECK-NEXT:    store i32 [[TMP6]], ptr [[I]], align 4, !tbaa [[TBAA2]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR3]]
 // CHECK-NEXT:    br label [[FOR_INC:%.*]]
 // CHECK:       for.inc:
-// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]]
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1
-// CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]]
-// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
+// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 1
+// CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
 // CHECK:       for.end:
 // CHECK-NEXT:    ret void
 //
@@ -204,16 +204,16 @@ void frl(int (&&e) [4])
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr [[__BEGIN1]]) #[[ATTR3]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]]
 // CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 0, i64 0
-// CHECK-NEXT:    store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]]
+// CHECK-NEXT:    store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr [[__END1]]) #[[ATTR3]]
 // CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]]
 // CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP2]], i64 0, i64 0
 // CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY1]], i64 4
-// CHECK-NEXT:    store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[TBAA16]]
+// CHECK-NEXT:    store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[TBAA14]]
 // CHECK-NEXT:    br label [[FOR_COND:%.*]]
 // CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[TBAA16]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[TBAA14]]
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[TMP3]], [[TMP4]]
 // CHECK-NEXT:    [[CMP_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CMP]], i1 false)
 // CHECK-NEXT:    br i1 [[CMP_EXPVAL]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -224,16 +224,16 @@ void frl(int (&&e) [4])
 // CHECK-NEXT:    br label [[FOR_END:%.*]]
 // CHECK:       for.body:
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR3]]
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
 // CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA2]]
 // CHECK-NEXT:    store i32 [[TMP6]], ptr [[I]], align 4, !tbaa [[TBAA2]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR3]]
 // CHECK-NEXT:    br label [[FOR_INC:%.*]]
 // CHECK:       for.inc:
-// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]]
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1
-// CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]]
-// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 1
+// CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]]
+// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
 // CHECK:       for.end:
 // CHECK-NEXT:    ret void
 //
diff --git clang/test/CodeGenCXX/const-base-cast.cpp clang/test/CodeGenCXX/const-base-cast.cpp
index bb08b9d21fcf..7f2c66e6ca08 100644
--- clang/test/CodeGenCXX/const-base-cast.cpp
+++ clang/test/CodeGenCXX/const-base-cast.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - -fexperimental-new-constant-interpreter | FileCheck %s
 
 // Check that the following construct, which is similar to one which occurs
 // in Firefox, is folded correctly.
diff --git clang/test/CodeGenCXX/debug-info-lambda-this.cpp clang/test/CodeGenCXX/debug-info-lambda-this.cpp
index 0a2f08ea4aa6..e5acab126d72 100644
--- clang/test/CodeGenCXX/debug-info-lambda-this.cpp
+++ clang/test/CodeGenCXX/debug-info-lambda-this.cpp
@@ -21,7 +21,8 @@ int main() {
   return 0;
 }
 
-// CHECK: !{![[FOO_THIS:[0-9]+]], ![[FOO_AA:[0-9]+]], ![[FOO_OPERATOR:[0-9]+]]}
+// CHECK: distinct !DICompositeType(tag: DW_TAG_class_type, name: "<lambda_1>", {{.*}}, elements: ![[ELEMENT_TAG:[0-9]+]]
+// CHECK: ![[ELEMENT_TAG]] = !{![[FOO_THIS:[0-9]+]], ![[FOO_AA:[0-9]+]], ![[FOO_OPERATOR:[0-9]+]]}
 // CHECK-NEXT: ![[FOO_THIS]] = !DIDerivedType(tag: DW_TAG_member, name: "__this", scope: ![[#]], file: ![[#]], line: [[#]], baseType: ![[#]], size: [[#]])
 // CHECK-NEXT: ![[FOO_AA]] = !DIDerivedType(tag: DW_TAG_member, name: "aa", scope: ![[#]], file: ![[#]], line: [[#]], baseType: ![[#]], size: [[#]], offset: [[#]])
 // CHECK-NEXT: ![[FOO_OPERATOR]] = !DISubprogram(name: "operator()", scope: ![[#]], file: ![[#]], line: [[#]], type: ![[#]], scopeLine: [[#]], flags: DIFlagPublic | DIFlagPrototyped, spFlags: 0)
diff --git clang/test/CodeGenCXX/for-range.cpp clang/test/CodeGenCXX/for-range.cpp
index 10d27206d12e..088a34647c37 100644
--- clang/test/CodeGenCXX/for-range.cpp
+++ clang/test/CodeGenCXX/for-range.cpp
@@ -33,7 +33,7 @@ B *end(C&);
 
 extern B array[5];
 
-// CHECK-LABEL: define {{[^@]+}}@_Z9for_arrayv(
+// CHECK-LABEL: @_Z9for_arrayv(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 1
 // CHECK-NEXT:    [[__RANGE1:%.*]] = alloca ptr, align 8
@@ -57,7 +57,7 @@ extern B array[5];
 // CHECK-NEXT:    br label [[FOR_INC:%.*]]
 // CHECK:       for.inc:
 // CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], ptr [[TMP3]], i32 1
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[TMP3]], i32 1
 // CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8
 // CHECK-NEXT:    br label [[FOR_COND]]
 // CHECK:       for.end:
@@ -70,7 +70,7 @@ void for_array() {
   }
 }
 
-// CHECK-LABEL: define {{[^@]+}}@_Z9for_rangev(
+// CHECK-LABEL: @_Z9for_rangev(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 1
 // CHECK-NEXT:    [[__RANGE1:%.*]] = alloca ptr, align 8
@@ -103,7 +103,7 @@ void for_array() {
 // CHECK-NEXT:    br label [[FOR_INC:%.*]]
 // CHECK:       for.inc:
 // CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], ptr [[TMP5]], i32 1
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[TMP5]], i32 1
 // CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8
 // CHECK-NEXT:    br label [[FOR_COND]]
 // CHECK:       for.end:
@@ -116,7 +116,7 @@ void for_range() {
   }
 }
 
-// CHECK-LABEL: define {{[^@]+}}@_Z16for_member_rangev(
+// CHECK-LABEL: @_Z16for_member_rangev(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 1
 // CHECK-NEXT:    [[__RANGE1:%.*]] = alloca ptr, align 8
@@ -149,7 +149,7 @@ void for_range() {
 // CHECK-NEXT:    br label [[FOR_INC:%.*]]
 // CHECK:       for.inc:
 // CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], ptr [[TMP5]], i32 1
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[TMP5]], i32 1
 // CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8
 // CHECK-NEXT:    br label [[FOR_COND]]
 // CHECK:       for.end:
diff --git clang/test/CodeGenCXX/microsoft-abi-template-static-init.cpp clang/test/CodeGenCXX/microsoft-abi-template-static-init.cpp
index 60b48abca2f8..871551240deb 100644
--- clang/test/CodeGenCXX/microsoft-abi-template-static-init.cpp
+++ clang/test/CodeGenCXX/microsoft-abi-template-static-init.cpp
@@ -49,8 +49,6 @@ struct X {
   static T ioo;
   static T init();
 };
-// template specialized static data don't need in llvm.used,
-// the static init routine get call from _GLOBAL__sub_I_ routines.
 template <> int X<int>::ioo = X<int>::init();
 template struct X<int>;
 class a {
@@ -87,5 +85,6 @@ struct S1
 int foo();
 inline int zoo = foo();
 inline static int boo = foo();
+inline __declspec(dllexport) A exported_inline{};
 
-// CHECK: @llvm.used = appending global [8 x ptr] [ptr @"?x@selectany_init@@3HA", ptr @"?x1@selectany_init@@3HA", ptr @"?x@?$A@H@explicit_template_instantiation@@2HA", ptr @"?ioo@?$X_@H@@2HA", ptr @"?aoo@S1@@2UA@@A", ptr @"?zoo@@3HA", ptr @"?s@?$ExportedTemplate@H@@2US@@A", ptr @"?x@?$A@H@implicit_template_instantiation@@2HA"], section "llvm.metadata"
+// CHECK: @llvm.used = appending global [10 x ptr] [ptr @"?x@selectany_init@@3HA", ptr @"?x1@selectany_init@@3HA", ptr @"?x@?$A@H@explicit_template_instantiation@@2HA", ptr @"?ioo@?$X_@H@@2HA", ptr @"?ioo@?$X@H@@2HA", ptr @"?aoo@S1@@2UA@@A", ptr @"?zoo@@3HA", ptr @"?exported_inline@@3UA@@A", ptr @"?s@?$ExportedTemplate@H@@2US@@A", ptr @"?x@?$A@H@implicit_template_instantiation@@2HA"], section "llvm.metadata"
diff --git clang/test/CodeGenCXX/pr45964-decomp-transform.cpp clang/test/CodeGenCXX/pr45964-decomp-transform.cpp
index f7df110ec012..bcb2d875dce6 100644
--- clang/test/CodeGenCXX/pr45964-decomp-transform.cpp
+++ clang/test/CodeGenCXX/pr45964-decomp-transform.cpp
@@ -16,7 +16,7 @@ void (*d)(){test_transform<0>};
 // CHECK-NEXT:  [[BODY]]:
 // CHECK-NEXT:  [[CUR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[BODY]] ]
 // CHECK-NEXT:  [[DEST:%.*]] = getelementptr inbounds i32, ptr [[BEGIN]], i64 [[CUR]]
-// CHECK-NEXT:  [[SRC:%.*]] = getelementptr inbounds [1 x i32], ptr @a, i64 0, i64 [[CUR]]
+// CHECK-NEXT:  [[SRC:%.*]] = getelementptr inbounds nuw [1 x i32], ptr @a, i64 0, i64 [[CUR]]
 // CHECK-NEXT:  [[X:%.*]] = load i32, ptr [[SRC]]
 // CHECK-NEXT:  store i32 [[X]], ptr [[DEST]]
 // CHECK-NEXT:  [[NEXT]] = add nuw i64 [[CUR]], 1
diff --git clang/test/CodeGenCXX/vla.cpp clang/test/CodeGenCXX/vla.cpp
index 4cf2b3b445b4..aadf51fce3a4 100644
--- clang/test/CodeGenCXX/vla.cpp
+++ clang/test/CodeGenCXX/vla.cpp
@@ -83,7 +83,7 @@ void test2(int b) {
   
   //CHECK: [[VLA_SIZEOF:%.*]] = mul nuw i64 4, [[VLA_NUM_ELEMENTS_PRE]]
   //CHECK-NEXT: [[VLA_NUM_ELEMENTS_POST:%.*]] = udiv i64 [[VLA_SIZEOF]], 4
-  //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds i32, ptr {{%.*}}, i64 [[VLA_NUM_ELEMENTS_POST]]
+  //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds nuw i32, ptr {{%.*}}, i64 [[VLA_NUM_ELEMENTS_POST]]
   //X64-NEXT: store ptr [[VLA_END_PTR]], ptr %__end1
   //AMDGCN-NEXT: store ptr [[VLA_END_PTR]], ptr [[END]]
   for (int d : varr) 0;
@@ -116,7 +116,7 @@ void test3(int b, int c) {
   //CHECK-NEXT: [[VLA_SIZEOF_DIM2:%.*]] = mul nuw i64 4, [[VLA_DIM2_PRE]]
   //CHECK-NEXT: [[VLA_NUM_ELEMENTS:%.*]] = udiv i64 [[VLA_SIZEOF]], [[VLA_SIZEOF_DIM2]]
   //CHECK-NEXT: [[VLA_END_INDEX:%.*]] = mul nsw i64 [[VLA_NUM_ELEMENTS]], [[VLA_DIM2_PRE]]
-  //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds i32, ptr {{%.*}}, i64 [[VLA_END_INDEX]]
+  //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds nuw i32, ptr {{%.*}}, i64 [[VLA_END_INDEX]]
   //X64-NEXT: store ptr [[VLA_END_PTR]], ptr %__end
   //AMDGCN-NEXT: store ptr [[VLA_END_PTR]], ptr [[END]]
  
diff --git clang/test/CodeGenCXX/vtable-available-externally.cpp clang/test/CodeGenCXX/vtable-available-externally.cpp
index a57eb39edfe1..ab105260bc75 100644
--- clang/test/CodeGenCXX/vtable-available-externally.cpp
+++ clang/test/CodeGenCXX/vtable-available-externally.cpp
@@ -250,28 +250,39 @@ struct C : A {
   virtual void car();
 };
 
+// Inline definition outside body, so we can't emit vtable available_externally
+// (see previous).
+// CHECK-TEST10-DAG: @_ZTVN6Test101FE = external unnamed_addr constant
+struct F : A {
+  void foo();
+  virtual void cat();         // inline outside body
+};
+inline void F::cat() {}
+
 // no key function, vtable will be generated everywhere it will be used
 // CHECK-TEST10-DAG: @_ZTVN6Test101EE = linkonce_odr unnamed_addr constant
 // CHECK-FORCE-EMIT-DAG: @_ZTVN6Test101EE = linkonce_odr unnamed_addr constant
 
 struct E : A {};
 
-void g(A& a) {
+void h(A& a) {
   a.foo();
   a.bar();
 }
 
-void f() {
+void g() {
   A a;
-  g(a);
+  h(a);
   B b;
-  g(b);
+  h(b);
   C c;
-  g(c);
+  h(c);
   D d;
-  g(d);
+  h(d);
   E e;
-  g(e);
+  h(e);
+  F f;
+  h(f);
 }
 
 }  // Test10
diff --git clang/test/CodeGenCoroutines/Inputs/utility.h clang/test/CodeGenCoroutines/Inputs/utility.h
new file mode 100644
index 000000000000..43c6d27823bd
--- /dev/null
+++ clang/test/CodeGenCoroutines/Inputs/utility.h
@@ -0,0 +1,13 @@
+// This is a mock file for <utility>
+
+namespace std {
+
+template <typename T> struct remove_reference { using type = T; };
+template <typename T> struct remove_reference<T &> { using type = T; };
+template <typename T> struct remove_reference<T &&> { using type = T; };
+
+template <typename T>
+constexpr typename std::remove_reference<T>::type&& move(T &&t) noexcept {
+  return static_cast<typename std::remove_reference<T>::type &&>(t);
+}
+}
diff --git clang/test/CodeGenCoroutines/coro-await-elidable.cpp clang/test/CodeGenCoroutines/coro-await-elidable.cpp
new file mode 100644
index 000000000000..8512995dfad4
--- /dev/null
+++ clang/test/CodeGenCoroutines/coro-await-elidable.cpp
@@ -0,0 +1,87 @@
+// This file tests the coro_await_elidable attribute semantics.
+// RUN: %clang_cc1 -triple=x86_64-unknown-linux-gnu -std=c++20 -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
+
+#include "Inputs/coroutine.h"
+#include "Inputs/utility.h"
+
+template <typename T>
+struct [[clang::coro_await_elidable]] Task {
+  struct promise_type {
+    struct FinalAwaiter {
+      bool await_ready() const noexcept { return false; }
+
+      template <typename P>
+      std::coroutine_handle<> await_suspend(std::coroutine_handle<P> coro) noexcept {
+        if (!coro)
+          return std::noop_coroutine();
+        return coro.promise().continuation;
+      }
+      void await_resume() noexcept {}
+    };
+
+    Task get_return_object() noexcept {
+      return std::coroutine_handle<promise_type>::from_promise(*this);
+    }
+
+    std::suspend_always initial_suspend() noexcept { return {}; }
+    FinalAwaiter final_suspend() noexcept { return {}; }
+    void unhandled_exception() noexcept {}
+    void return_value(T x) noexcept {
+      value = x;
+    }
+
+    std::coroutine_handle<> continuation;
+    T value;
+  };
+
+  Task(std::coroutine_handle<promise_type> handle) : handle(handle) {}
+  ~Task() {
+    if (handle)
+      handle.destroy();
+  }
+
+  struct Awaiter {
+    Awaiter(Task *t) : task(t) {}
+    bool await_ready() const noexcept { return false; }
+    void await_suspend(std::coroutine_handle<void> continuation) noexcept {}
+    T await_resume() noexcept {
+      return task->handle.promise().value;
+    }
+
+    Task *task;
+  };
+
+  auto operator co_await() {
+    return Awaiter{this};
+  }
+
+private:
+  std::coroutine_handle<promise_type> handle;
+};
+
+// CHECK-LABEL: define{{.*}} @_Z6calleev{{.*}} {
+Task<int> callee() {
+  co_return 1;
+}
+
+// CHECK-LABEL: define{{.*}} @_Z8elidablev{{.*}} {
+Task<int> elidable() {
+  // CHECK: %[[TASK_OBJ:.+]] = alloca %struct.Task
+  // CHECK: call void @_Z6calleev(ptr dead_on_unwind writable sret(%struct.Task) align 8 %[[TASK_OBJ]]) #[[ELIDE_SAFE:.+]]
+  co_return co_await callee();
+}
+
+// CHECK-LABEL: define{{.*}} @_Z11nonelidablev{{.*}} {
+Task<int> nonelidable() {
+  // CHECK: %[[TASK_OBJ:.+]] = alloca %struct.Task
+  auto t = callee();
+  // Because we aren't co_awaiting a prvalue, we cannot elide here.
+  // CHECK: call void @_Z6calleev(ptr dead_on_unwind writable sret(%struct.Task) align 8 %[[TASK_OBJ]])
+  // CHECK-NOT: #[[ELIDE_SAFE]]
+  co_await t;
+  co_await std::move(t);
+
+  co_return 1;
+}
+
+// CHECK: attributes #[[ELIDE_SAFE]] = { coro_elide_safe }
diff --git clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
index 5d751be6dae0..6478ea67e32a 100644
--- clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
+++ clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
@@ -117,3 +117,27 @@ void d4_to_b2() {
   vector<double,4> d4 = 9.0;
   vector<bool, 2> b2 = d4;
 }
+
+// CHECK-LABEL: d4_to_d1
+// CHECK: [[d4:%.*]] = alloca <4 x double>
+// CHECK: [[d1:%.*]] = alloca <1 x double>
+// CHECK: store <4 x double> <double 9.000000e+00, double 9.000000e+00, double 9.000000e+00, double 9.000000e+00>, ptr [[d4]]
+// CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
+// CHECK: [[vecd1:%.*]] = shufflevector <4 x double> [[vecd4]], <4 x double> poison, <1 x i32> zeroinitializer
+// CHECK: store <1 x double> [[vecd1]], ptr [[d1:%.*]], align 8
+void d4_to_d1() {
+  vector<double,4> d4 = 9.0;
+  vector<double,1> d1 = d4;
+}
+
+// CHECK-LABEL: d4_to_dScalar
+// CHECK: [[d4:%.*]] = alloca <4 x double>
+// CHECK: [[d:%.*]] = alloca double
+// CHECK: store <4 x double> <double 9.000000e+00, double 9.000000e+00, double 9.000000e+00, double 9.000000e+00>, ptr [[d4]]
+// CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
+// CHECK: [[d4x:%.*]] = extractelement <4 x double> [[vecd4]], i32 0
+// CHECK: store double [[d4x]], ptr [[d]]
+void d4_to_dScalar() {
+  vector<double,4> d4 = 9.0;
+  double d = d4;
+}
diff --git clang/test/CodeGenHLSL/buffer-array-operator.hlsl clang/test/CodeGenHLSL/buffer-array-operator.hlsl
index f5556df30871..02e570ebdcb4 100644
--- clang/test/CodeGenHLSL/buffer-array-operator.hlsl
+++ clang/test/CodeGenHLSL/buffer-array-operator.hlsl
@@ -17,7 +17,7 @@ void fn(int Idx) {
 // CHECK-NEXT: %h = getelementptr inbounds nuw %"class.hlsl::RWBuffer", ptr %this1, i32 0, i32 0
 // CHECK-NEXT: %0 = load ptr, ptr %h, align 4
 // CHECK-NEXT: %1 = load i32, ptr %Idx.addr, align 4
-// CHECK-NEXT: %arrayidx = getelementptr inbounds float, ptr %0, i32 %1
+// CHECK-NEXT: %arrayidx = getelementptr inbounds nuw float, ptr %0, i32 %1
 // CHECK-NEXT: ret ptr %arrayidx
 
 // Const comes next, and returns the pointer instead of the value.
@@ -26,5 +26,5 @@ void fn(int Idx) {
 // CHECK-NEXT: %h = getelementptr inbounds nuw %"class.hlsl::RWBuffer", ptr %this1, i32 0, i32 0
 // CHECK-NEXT: %0 = load ptr, ptr %h, align 4
 // CHECK-NEXT: %1 = load i32, ptr %Idx.addr, align 4
-// CHECK-NEXT: %arrayidx = getelementptr inbounds float, ptr %0, i32 %1
+// CHECK-NEXT: %arrayidx = getelementptr inbounds nuw float, ptr %0, i32 %1
 // CHECK-NEXT: ret ptr %arrayidx
diff --git clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl
new file mode 100644
index 000000000000..16b7295c985f
--- /dev/null
+++ clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+StructuredBuffer<float> Buffer1;
+StructuredBuffer<vector<float, 4> > BufferArray[4];
+
+StructuredBuffer<float> Buffer2 : register(u3);
+StructuredBuffer<vector<float, 4> > BufferArray2[4] : register(u4);
+
+StructuredBuffer<float> Buffer3 : register(u3, space1);
+StructuredBuffer<vector<float, 4> > BufferArray3[4] : register(u4, space1);
+
+[numthreads(1,1,1)]
+void main() {
+}
+
+// CHECK: !hlsl.uavs = !{![[Single:[0-9]+]], ![[Array:[0-9]+]], ![[SingleAllocated:[0-9]+]], ![[ArrayAllocated:[0-9]+]], ![[SingleSpace:[0-9]+]], ![[ArraySpace:[0-9]+]]}
+// CHECK-DAG: ![[Single]] = !{ptr @"?Buffer1@@3V?$StructuredBuffer@M@hlsl@@A", i32 10, i32 9, i1 false, i32 -1, i32 0}
+// CHECK-DAG: ![[Array]] = !{ptr @"?BufferArray@@3PAV?$StructuredBuffer@T?$__vector@M$03@__clang@@@hlsl@@A", i32 10, i32 9, i1 false, i32 -1, i32 0}
+// CHECK-DAG: ![[SingleAllocated]] = !{ptr @"?Buffer2@@3V?$StructuredBuffer@M@hlsl@@A", i32 10, i32 9, i1 false, i32 3, i32 0}
+// CHECK-DAG: ![[ArrayAllocated]] = !{ptr @"?BufferArray2@@3PAV?$StructuredBuffer@T?$__vector@M$03@__clang@@@hlsl@@A", i32 10, i32 9, i1 false, i32 4, i32 0}
+// CHECK-DAG: ![[SingleSpace]] = !{ptr @"?Buffer3@@3V?$StructuredBuffer@M@hlsl@@A", i32 10, i32 9, i1 false, i32 3, i32 1}
+// CHECK-DAG: ![[ArraySpace]] = !{ptr @"?BufferArray3@@3PAV?$StructuredBuffer@T?$__vector@M$03@__clang@@@hlsl@@A", i32 10, i32 9, i1 false, i32 4, i32 1}
diff --git clang/test/CodeGenHLSL/builtins/StructuredBuffer-constructor.hlsl clang/test/CodeGenHLSL/builtins/StructuredBuffer-constructor.hlsl
new file mode 100644
index 000000000000..34019e5b1869
--- /dev/null
+++ clang/test/CodeGenHLSL/builtins/StructuredBuffer-constructor.hlsl
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-SPIRV
+
+StructuredBuffer<float> Buf;
+
+// CHECK: define linkonce_odr noundef ptr @"??0?$StructuredBuffer@M@hlsl@@QAA@XZ"
+// CHECK-NEXT: entry:
+
+// CHECK: %[[HandleRes:[0-9]+]] = call ptr @llvm.dx.create.handle(i8 1)
+// CHECK: store ptr %[[HandleRes]], ptr %h, align 4
+
+// CHECK-SPIRV: %[[HandleRes:[0-9]+]] = call ptr @llvm.spv.create.handle(i8 1)
+// CHECK-SPIRV: store ptr %[[HandleRes]], ptr %h, align 8
diff --git clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl
new file mode 100644
index 000000000000..8ddf8a600440
--- /dev/null
+++ clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s
+
+StructuredBuffer<int16_t> BufI16;
+StructuredBuffer<uint16_t> BufU16;
+StructuredBuffer<int> BufI32;
+StructuredBuffer<uint> BufU32;
+StructuredBuffer<int64_t> BufI64;
+StructuredBuffer<uint64_t> BufU64;
+StructuredBuffer<half> BufF16;
+StructuredBuffer<float> BufF32;
+StructuredBuffer<double> BufF64;
+StructuredBuffer< vector<int16_t, 4> > BufI16x4;
+StructuredBuffer< vector<uint, 3> > BufU32x3;
+StructuredBuffer<half2> BufF16x2;
+StructuredBuffer<float3> BufF32x3;
+// TODO: StructuredBuffer<snorm half> BufSNormF16; -> 11
+// TODO: StructuredBuffer<unorm half> BufUNormF16; -> 12
+// TODO: StructuredBuffer<snorm float> BufSNormF32; -> 13
+// TODO: StructuredBuffer<unorm float> BufUNormF32; -> 14
+// TODO: StructuredBuffer<snorm double> BufSNormF64; -> 15
+// TODO: StructuredBuffer<unorm double> BufUNormF64; -> 16
+
+[numthreads(1,1,1)]
+void main(int GI : SV_GroupIndex) {
+  BufI16[GI] = 0;
+  BufU16[GI] = 0;
+  BufI32[GI] = 0;
+  BufU32[GI] = 0;
+  BufI64[GI] = 0;
+  BufU64[GI] = 0;
+  BufF16[GI] = 0;
+  BufF32[GI] = 0;
+  BufF64[GI] = 0;
+  BufI16x4[GI] = 0;
+  BufU32x3[GI] = 0;
+  BufF16x2[GI] = 0;
+  BufF32x3[GI] = 0;
+}
+
+// CHECK: !{{[0-9]+}} = !{ptr @"?BufI16@@3V?$StructuredBuffer@F@hlsl@@A", i32 10, i32 2,
+// CHECK: !{{[0-9]+}} = !{ptr @"?BufU16@@3V?$StructuredBuffer@G@hlsl@@A", i32 10, i32 3,
+// CHECK: !{{[0-9]+}} = !{ptr @"?BufI32@@3V?$StructuredBuffer@H@hlsl@@A", i32 10, i32 4,
+// CHECK: !{{[0-9]+}} = !{ptr @"?BufU32@@3V?$StructuredBuffer@I@hlsl@@A", i32 10, i32 5,
+// CHECK: !{{[0-9]+}} = !{ptr @"?BufI64@@3V?$StructuredBuffer@J@hlsl@@A", i32 10, i32 6,
+// CHECK: !{{[0-9]+}} = !{ptr @"?BufU64@@3V?$StructuredBuffer@K@hlsl@@A", i32 10, i32 7,
+// CHECK: !{{[0-9]+}} = !{ptr @"?BufF16@@3V?$StructuredBuffer@$f16@@hlsl@@A", i32 10, i32 8,
+// CHECK: !{{[0-9]+}} = !{ptr @"?BufF32@@3V?$StructuredBuffer@M@hlsl@@A", i32 10, i32 9,
+// CHECK: !{{[0-9]+}} = !{ptr @"?BufF64@@3V?$StructuredBuffer@N@hlsl@@A", i32 10, i32 10,
+// CHECK: !{{[0-9]+}} = !{ptr @"?BufI16x4@@3V?$StructuredBuffer@T?$__vector@F$03@__clang@@@hlsl@@A", i32 10, i32 2,
+// CHECK: !{{[0-9]+}} = !{ptr @"?BufU32x3@@3V?$StructuredBuffer@T?$__vector@I$02@__clang@@@hlsl@@A", i32 10, i32 5,
+// CHECK: !{{[0-9]+}} = !{ptr @"?BufF16x2@@3V?$StructuredBuffer@T?$__vector@$f16@$01@__clang@@@hlsl@@A", i32 10, i32 8,
+// CHECK: !{{[0-9]+}} = !{ptr @"?BufF32x3@@3V?$StructuredBuffer@T?$__vector@M$02@__clang@@@hlsl@@A", i32 10, i32 9,
diff --git clang/test/CodeGenHLSL/builtins/StructuredBuffer-subscript.hlsl clang/test/CodeGenHLSL/builtins/StructuredBuffer-subscript.hlsl
new file mode 100644
index 000000000000..9bd885d94d7e
--- /dev/null
+++ clang/test/CodeGenHLSL/builtins/StructuredBuffer-subscript.hlsl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -o - -O0 %s | FileCheck %s
+
+StructuredBuffer<int> In;
+StructuredBuffer<int> Out;
+
+[numthreads(1,1,1)]
+void main(unsigned GI : SV_GroupIndex) {
+  Out[GI] = In[GI];
+}
+
+// Even at -O0 the subscript operators get inlined. The -O0 IR is a bit messy
+// and confusing to follow so the match here is pretty weak.
+
+// CHECK: define internal void @"?main@@YAXI@Z"
+// CHECK-NOT: call
+// CHECK: ret void
diff --git clang/test/CodeGenHLSL/builtins/dot.hlsl clang/test/CodeGenHLSL/builtins/dot.hlsl
index 2b76fae61147..3f6be04a595e 100644
--- clang/test/CodeGenHLSL/builtins/dot.hlsl
+++ clang/test/CodeGenHLSL/builtins/dot.hlsl
@@ -155,18 +155,6 @@ float test_dot_float3(float3 p0, float3 p1) { return dot(p0, p1); }
 // CHECK: ret float %hlsl.dot
 float test_dot_float4(float4 p0, float4 p1) { return dot(p0, p1); }
 
-// CHECK:  %hlsl.dot = call float @llvm.[[ICF]].fdot.v2f32(<2 x float> %splat.splat, <2 x float>
-// CHECK: ret float %hlsl.dot
-float test_dot_float2_splat(float p0, float2 p1) { return dot(p0, p1); }
-
-// CHECK:  %hlsl.dot = call float @llvm.[[ICF]].fdot.v3f32(<3 x float> %splat.splat, <3 x float>
-// CHECK: ret float %hlsl.dot
-float test_dot_float3_splat(float p0, float3 p1) { return dot(p0, p1); }
-
-// CHECK:  %hlsl.dot = call float @llvm.[[ICF]].fdot.v4f32(<4 x float> %splat.splat, <4 x float>
-// CHECK: ret float %hlsl.dot
-float test_dot_float4_splat(float p0, float4 p1) { return dot(p0, p1); }
-
 // CHECK: %hlsl.dot = fmul double
 // CHECK: ret double %hlsl.dot
 double test_dot_double(double p0, double p1) { return dot(p0, p1); }
diff --git clang/test/CodeGenHLSL/builtins/hlsl_resource_t.hlsl clang/test/CodeGenHLSL/builtins/hlsl_resource_t.hlsl
new file mode 100644
index 000000000000..ce9733090347
--- /dev/null
+++ clang/test/CodeGenHLSL/builtins/hlsl_resource_t.hlsl
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -O1 -o - %s | FileCheck %s
+
+void foo(__hlsl_resource_t res);
+
+// CHECK: define void @"?bar@@YAXU__hlsl_resource_t@@@Z"(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %[[PARAM:[a-zA-Z0-9]+]])
+// CHECK: call void @"?foo@@YAXU__hlsl_resource_t@@@Z"(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %[[PARAM]])
+void bar(__hlsl_resource_t a) {
+    foo(a);
+}
diff --git clang/test/CodeGenHLSL/builtins/lerp.hlsl clang/test/CodeGenHLSL/builtins/lerp.hlsl
index 298d157da00a..b11046894bd8 100644
--- clang/test/CodeGenHLSL/builtins/lerp.hlsl
+++ clang/test/CodeGenHLSL/builtins/lerp.hlsl
@@ -56,21 +56,3 @@ float3 test_lerp_float3(float3 p0) { return lerp(p0, p0, p0); }
 // CHECK: %hlsl.lerp = call <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
 // CHECK: ret <4 x float> %hlsl.lerp
 float4 test_lerp_float4(float4 p0) { return lerp(p0, p0, p0); }
-
-// CHECK: %[[b:.*]] = load <2 x float>, ptr %p1.addr, align 8
-// CHECK: %[[c:.*]] = load <2 x float>, ptr %p1.addr, align 8
-// CHECK: %hlsl.lerp = call <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> %splat.splat, <2 x float> %[[b]], <2 x float> %[[c]])
-// CHECK: ret <2 x float> %hlsl.lerp
-float2 test_lerp_float2_splat(float p0, float2 p1) { return lerp(p0, p1, p1); }
-
-// CHECK: %[[b:.*]] = load <3 x float>, ptr %p1.addr, align 16
-// CHECK: %[[c:.*]] = load <3 x float>, ptr %p1.addr, align 16
-// CHECK: %hlsl.lerp = call <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> %splat.splat, <3 x float> %[[b]], <3 x float> %[[c]])
-// CHECK: ret <3 x float> %hlsl.lerp
-float3 test_lerp_float3_splat(float p0, float3 p1) { return lerp(p0, p1, p1); }
-
-// CHECK: %[[b:.*]] = load <4 x float>, ptr %p1.addr, align 16
-// CHECK: %[[c:.*]] = load <4 x float>, ptr %p1.addr, align 16
-// CHECK: %hlsl.lerp = call <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %splat.splat, <4 x float> %[[b]], <4 x float> %[[c]])
-// CHECK:  ret <4 x float> %hlsl.lerp
-float4 test_lerp_float4_splat(float p0, float4 p1) { return lerp(p0, p1, p1); }
diff --git clang/test/CodeGenHLSL/builtins/mad.hlsl clang/test/CodeGenHLSL/builtins/mad.hlsl
index 449a793caf93..265a2552c80f 100644
--- clang/test/CodeGenHLSL/builtins/mad.hlsl
+++ clang/test/CodeGenHLSL/builtins/mad.hlsl
@@ -263,21 +263,3 @@ uint64_t3 test_mad_uint64_t3(uint64_t3 p0, uint64_t3 p1, uint64_t3 p2) { return
 // SPIR_CHECK: mul nuw <4 x i64>  %{{.*}}, %{{.*}}
 // SPIR_CHECK: add nuw <4 x i64>  %{{.*}}, %{{.*}}
 uint64_t4 test_mad_uint64_t4(uint64_t4 p0, uint64_t4 p1, uint64_t4 p2) { return mad(p0, p1, p2); }
-
-// CHECK: %[[p1:.*]] = load <2 x float>, ptr %p1.addr, align 8
-// CHECK: %[[p2:.*]] = load <2 x float>, ptr %p2.addr, align 8
-// CHECK: %hlsl.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %splat.splat, <2 x float> %[[p1]], <2 x float> %[[p2]])
-// CHECK: ret <2 x float> %hlsl.fmad
-float2 test_mad_float2_splat(float p0, float2 p1, float2 p2) { return mad(p0, p1, p2); }
-
-// CHECK: %[[p1:.*]] = load <3 x float>, ptr %p1.addr, align 16
-// CHECK: %[[p2:.*]] = load <3 x float>, ptr %p2.addr, align 16
-// CHECK: %hlsl.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %splat.splat, <3 x float> %[[p1]], <3 x float> %[[p2]])
-// CHECK: ret <3 x float> %hlsl.fmad
-float3 test_mad_float3_splat(float p0, float3 p1, float3 p2) { return mad(p0, p1, p2); }
-
-// CHECK: %[[p1:.*]] = load <4 x float>, ptr %p1.addr, align 16
-// CHECK: %[[p2:.*]] = load <4 x float>, ptr %p2.addr, align 16
-// CHECK:  %hlsl.fmad = call <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %splat.splat, <4 x float> %[[p1]], <4 x float> %[[p2]])
-// CHECK:  ret <4 x float> %hlsl.fmad
-float4 test_mad_float4_splat(float p0, float4 p1, float4 p2) { return mad(p0, p1, p2); }
diff --git clang/test/CodeGenHLSL/builtins/select.hlsl clang/test/CodeGenHLSL/builtins/select.hlsl
new file mode 100644
index 000000000000..cade938b71a2
--- /dev/null
+++ clang/test/CodeGenHLSL/builtins/select.hlsl
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK
+
+// CHECK-LABEL: test_select_bool_int
+// CHECK: [[SELECT:%.*]] = select i1 {{%.*}}, i32 {{%.*}}, i32 {{%.*}}
+// CHECK: ret i32 [[SELECT]]
+int test_select_bool_int(bool cond0, int tVal, int fVal) {
+  return select<int>(cond0, tVal, fVal);
+}
+
+struct S { int a; };
+// CHECK-LABEL: test_select_infer
+// CHECK: [[SELECT:%.*]] = select i1 {{%.*}}, ptr {{%.*}}, ptr {{%.*}}
+// CHECK: store ptr [[SELECT]]
+// CHECK: ret void
+struct S test_select_infer(bool cond0, struct S tVal, struct S fVal) {
+  return select(cond0, tVal, fVal);
+}
+
+// CHECK-LABEL: test_select_bool_vector
+// CHECK: [[SELECT:%.*]] = select i1 {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> {{%.*}}
+// CHECK: ret <2 x i32> [[SELECT]]
+int2 test_select_bool_vector(bool cond0, int2 tVal, int2 fVal) {
+  return select<int2>(cond0, tVal, fVal);
+}
+
+// CHECK-LABEL: test_select_vector_1
+// CHECK: [[SELECT:%.*]] = select <1 x i1> {{%.*}}, <1 x i32> {{%.*}}, <1 x i32> {{%.*}}
+// CHECK: ret <1 x i32> [[SELECT]]
+int1 test_select_vector_1(bool1 cond0, int1 tVals, int1 fVals) {
+  return select<int,1>(cond0, tVals, fVals);
+}
+
+// CHECK-LABEL: test_select_vector_2
+// CHECK: [[SELECT:%.*]] = select <2 x i1> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> {{%.*}}
+// CHECK: ret <2 x i32> [[SELECT]]
+int2 test_select_vector_2(bool2 cond0, int2 tVals, int2 fVals) {
+  return select<int,2>(cond0, tVals, fVals);
+}
+
+// CHECK-LABEL: test_select_vector_3
+// CHECK: [[SELECT:%.*]] = select <3 x i1> {{%.*}}, <3 x i32> {{%.*}}, <3 x i32> {{%.*}}
+// CHECK: ret <3 x i32> [[SELECT]]
+int3 test_select_vector_3(bool3 cond0, int3 tVals, int3 fVals) {
+  return select<int,3>(cond0, tVals, fVals);
+}
+
+// CHECK-LABEL: test_select_vector_4
+// CHECK: [[SELECT:%.*]] = select <4 x i1> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> {{%.*}}
+// CHECK: ret <4 x i32> [[SELECT]]
+int4 test_select_vector_4(bool4 cond0, int4 tVals, int4 fVals) {
+  return select(cond0, tVals, fVals);
+}
diff --git clang/test/CodeGenHLSL/builtins/sign.hlsl clang/test/CodeGenHLSL/builtins/sign.hlsl
new file mode 100644
index 000000000000..4bb239fb009e
--- /dev/null
+++ clang/test/CodeGenHLSL/builtins/sign.hlsl
@@ -0,0 +1,157 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DTARGET=dx -DFNATTRS=noundef
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DTARGET=dx -DFNATTRS=noundef
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DTARGET=spv -DFNATTRS="spir_func noundef"
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DTARGET=spv -DFNATTRS="spir_func noundef"
+
+// NATIVE_HALF: define [[FNATTRS]] i32 @
+// NATIVE_HALF: %hlsl.sign = call i32 @llvm.[[TARGET]].sign.f16(
+// NATIVE_HALF: ret i32 %hlsl.sign
+// NO_HALF: define [[FNATTRS]] i32 @
+// NO_HALF: %hlsl.sign = call i32 @llvm.[[TARGET]].sign.f32(
+// NO_HALF: ret i32 %hlsl.sign
+int test_sign_half(half p0) { return sign(p0); }
+
+// NATIVE_HALF: define [[FNATTRS]] <2 x i32> @
+// NATIVE_HALF: %hlsl.sign = call <2 x i32> @llvm.[[TARGET]].sign.v2f16(
+// NATIVE_HALF: ret <2 x i32> %hlsl.sign
+// NO_HALF: define [[FNATTRS]] <2 x i32> @
+// NO_HALF: %hlsl.sign = call <2 x i32> @llvm.[[TARGET]].sign.v2f32(
+// NO_HALF: ret <2 x i32> %hlsl.sign
+int2 test_sign_half2(half2 p0) { return sign(p0); }
+
+// NATIVE_HALF: define [[FNATTRS]] <3 x i32> @
+// NATIVE_HALF: %hlsl.sign = call <3 x i32> @llvm.[[TARGET]].sign.v3f16(
+// NATIVE_HALF: ret <3 x i32> %hlsl.sign
+// NO_HALF: define [[FNATTRS]] <3 x i32> @
+// NO_HALF: %hlsl.sign = call <3 x i32> @llvm.[[TARGET]].sign.v3f32(
+// NO_HALF: ret <3 x i32> %hlsl.sign
+int3 test_sign_half3(half3 p0) { return sign(p0); }
+
+// NATIVE_HALF: define [[FNATTRS]] <4 x i32> @
+// NATIVE_HALF: %hlsl.sign = call <4 x i32> @llvm.[[TARGET]].sign.v4f16(
+// NATIVE_HALF: ret <4 x i32> %hlsl.sign
+// NO_HALF: define [[FNATTRS]] <4 x i32> @
+// NO_HALF: %hlsl.sign = call <4 x i32> @llvm.[[TARGET]].sign.v4f32(
+// NO_HALF: ret <4 x i32> %hlsl.sign
+int4 test_sign_half4(half4 p0) { return sign(p0); }
+
+
+// CHECK: define [[FNATTRS]] i32 @
+// CHECK: %hlsl.sign = call i32 @llvm.[[TARGET]].sign.f32(
+// CHECK: ret i32 %hlsl.sign
+int test_sign_float(float p0) { return sign(p0); }
+
+// CHECK: define [[FNATTRS]] <2 x i32> @
+// CHECK: %hlsl.sign = call <2 x i32> @llvm.[[TARGET]].sign.v2f32(
+// CHECK: ret <2 x i32> %hlsl.sign
+int2 test_sign_float2(float2 p0) { return sign(p0); }
+
+// CHECK: define [[FNATTRS]] <3 x i32> @
+// CHECK: %hlsl.sign = call <3 x i32> @llvm.[[TARGET]].sign.v3f32(
+// CHECK: ret <3 x i32> %hlsl.sign
+int3 test_sign_float3(float3 p0) { return sign(p0); }
+
+// CHECK: define [[FNATTRS]] <4 x i32> @
+// CHECK: %hlsl.sign = call <4 x i32> @llvm.[[TARGET]].sign.v4f32(
+// CHECK: ret <4 x i32> %hlsl.sign
+int4 test_sign_float4(float4 p0) { return sign(p0); }
+
+
+// CHECK: define [[FNATTRS]] i32 @
+// CHECK: %hlsl.sign = call i32 @llvm.[[TARGET]].sign.f64(
+// CHECK: ret i32 %hlsl.sign
+int test_sign_double(double p0) { return sign(p0); }
+
+// CHECK: define [[FNATTRS]] <2 x i32> @
+// CHECK: %hlsl.sign = call <2 x i32> @llvm.[[TARGET]].sign.v2f64(
+// CHECK: ret <2 x i32> %hlsl.sign
+int2 test_sign_double2(double2 p0) { return sign(p0); }
+
+// CHECK: define [[FNATTRS]] <3 x i32> @
+// CHECK: %hlsl.sign = call <3 x i32> @llvm.[[TARGET]].sign.v3f64(
+// CHECK: ret <3 x i32> %hlsl.sign
+int3 test_sign_double3(double3 p0) { return sign(p0); }
+
+// CHECK: define [[FNATTRS]] <4 x i32> @
+// CHECK: %hlsl.sign = call <4 x i32> @llvm.[[TARGET]].sign.v4f64(
+// CHECK: ret <4 x i32> %hlsl.sign
+int4 test_sign_double4(double4 p0) { return sign(p0); }
+
+
+#ifdef __HLSL_ENABLE_16_BIT
+// NATIVE_HALF: define [[FNATTRS]] i32 @
+// NATIVE_HALF: %hlsl.sign = call i32 @llvm.[[TARGET]].sign.i16(
+// NATIVE_HALF: ret i32 %hlsl.sign
+int test_sign_int16_t(int16_t p0) { return sign(p0); }
+
+// NATIVE_HALF: define [[FNATTRS]] <2 x i32> @
+// NATIVE_HALF: %hlsl.sign = call <2 x i32> @llvm.[[TARGET]].sign.v2i16(
+// NATIVE_HALF: ret <2 x i32> %hlsl.sign
+int2 test_sign_int16_t2(int16_t2 p0) { return sign(p0); }
+
+// NATIVE_HALF: define [[FNATTRS]] <3 x i32> @
+// NATIVE_HALF: %hlsl.sign = call <3 x i32> @llvm.[[TARGET]].sign.v3i16(
+// NATIVE_HALF: ret <3 x i32> %hlsl.sign
+int3 test_sign_int16_t3(int16_t3 p0) { return sign(p0); }
+
+// NATIVE_HALF: define [[FNATTRS]] <4 x i32> @
+// NATIVE_HALF: %hlsl.sign = call <4 x i32> @llvm.[[TARGET]].sign.v4i16(
+// NATIVE_HALF: ret <4 x i32> %hlsl.sign
+int4 test_sign_int16_t4(int16_t4 p0) { return sign(p0); }
+#endif // __HLSL_ENABLE_16_BIT
+
+
+// CHECK: define [[FNATTRS]] i32 @
+// CHECK: %hlsl.sign = call i32 @llvm.[[TARGET]].sign.i32(
+// CHECK: ret i32 %hlsl.sign
+int test_sign_int(int p0) { return sign(p0); }
+
+// CHECK: define [[FNATTRS]] <2 x i32> @
+// CHECK: %hlsl.sign = call <2 x i32> @llvm.[[TARGET]].sign.v2i32(
+// CHECK: ret <2 x i32> %hlsl.sign
+int2 test_sign_int2(int2 p0) { return sign(p0); }
+
+// CHECK: define [[FNATTRS]] <3 x i32> @
+// CHECK: %hlsl.sign = call <3 x i32> @llvm.[[TARGET]].sign.v3i32(
+// CHECK: ret <3 x i32> %hlsl.sign
+int3 test_sign_int3(int3 p0) { return sign(p0); }
+
+// CHECK: define [[FNATTRS]] <4 x i32> @
+// CHECK: %hlsl.sign = call <4 x i32> @llvm.[[TARGET]].sign.v4i32(
+// CHECK: ret <4 x i32> %hlsl.sign
+int4 test_sign_int4(int4 p0) { return sign(p0); }
+
+
+// CHECK: define [[FNATTRS]] i32 @
+// CHECK: %hlsl.sign = call i32 @llvm.[[TARGET]].sign.i64(
+// CHECK: ret i32 %hlsl.sign
+int test_sign_int64_t(int64_t p0) { return sign(p0); }
+
+// CHECK: define [[FNATTRS]] <2 x i32> @
+// CHECK: %hlsl.sign = call <2 x i32> @llvm.[[TARGET]].sign.v2i64(
+// CHECK: ret <2 x i32> %hlsl.sign
+int2 test_sign_int64_t2(int64_t2 p0) { return sign(p0); }
+
+// CHECK: define [[FNATTRS]] <3 x i32> @
+// CHECK: %hlsl.sign = call <3 x i32> @llvm.[[TARGET]].sign.v3i64(
+// CHECK: ret <3 x i32> %hlsl.sign
+int3 test_sign_int64_t3(int64_t3 p0) { return sign(p0); }
+
+// CHECK: define [[FNATTRS]] <4 x i32> @
+// CHECK: %hlsl.sign = call <4 x i32> @llvm.[[TARGET]].sign.v4i64(
+// CHECK: ret <4 x i32> %hlsl.sign
+int4 test_sign_int64_t4(int64_t4 p0) { return sign(p0); }
diff --git clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl
new file mode 100644
index 000000000000..18860c321eb9
--- /dev/null
+++ clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple   \
+// RUN:   spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN:   FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple   \
+// RUN:   dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN:   FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
+
+[numthreads(1, 1, 1)]
+void main() {
+// CHECK-SPIRV: %[[#entry_tok:]] = call token @llvm.experimental.convergence.entry()
+
+// CHECK-SPIRV: %[[#loop_tok:]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %[[#entry_tok]]) ]
+  while (true) {
+
+// CHECK-DXIL:  %[[#]] = call i1 @llvm.dx.wave.is.first.lane()
+// CHECK-SPIRV: %[[#]] = call i1 @llvm.spv.wave.is.first.lane()
+// CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#loop_tok]]) ]
+    if (WaveIsFirstLane()) {
+      break;
+    }
+  }
+
+// CHECK-DXIL:  %[[#]] = call i1 @llvm.dx.wave.is.first.lane()
+// CHECK-SPIRV: %[[#]] = call i1 @llvm.spv.wave.is.first.lane()
+// CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#entry_tok]]) ]
+  if (WaveIsFirstLane()) {
+    return;
+  }
+}
+
+// CHECK-DXIL:  i1 @llvm.dx.wave.is.first.lane() #[[#attr:]]
+// CHECK-SPIRV: i1 @llvm.spv.wave.is.first.lane() #[[#attr:]]
+
+// CHECK: attributes #[[#attr]] = {{{.*}} convergent {{.*}}}
diff --git clang/test/CodeGenHLSL/loops/unroll.hlsl clang/test/CodeGenHLSL/loops/unroll.hlsl
index 7389f21dd347..efca0747805d 100644
--- clang/test/CodeGenHLSL/loops/unroll.hlsl
+++ clang/test/CodeGenHLSL/loops/unroll.hlsl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
 // RUN: dxil-pc-shadermodel6.3-library -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s
 
 /*** for ***/
@@ -35,8 +35,8 @@ void for_nested_one_unroll_enable()
         for( int j = 0; j < 10; ++j)
             s += i + j;
     }
-// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_NESTED_ENABLE:.*]]
-// CHECK-NOT: br label %{{.*}}, !llvm.loop ![[FOR_NESTED_1_ENABLE:.*]]
+// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_NESTED_ENABLE_INNER:.*]]
+// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_NESTED_ENABLE_OUTER:.*]]
 }
 
 void for_nested_two_unroll_enable()
@@ -111,20 +111,26 @@ void do_enable()
 }
 
 
-// CHECK: ![[FOR_DISTINCT]]     =  distinct !{![[FOR_DISTINCT]],  ![[FOR_COUNT:.*]]}
-// CHECK: ![[FOR_COUNT]]         =  !{!"llvm.loop.unroll.count", i32 8}
-// CHECK: ![[FOR_DISABLE]]   =  distinct !{![[FOR_DISABLE]],  ![[DISABLE:.*]]}
-// CHECK: ![[DISABLE]]       =  !{!"llvm.loop.unroll.disable"}
-// CHECK: ![[FOR_ENABLE]]      =  distinct !{![[FOR_ENABLE]],  ![[ENABLE:.*]]}
-// CHECK: ![[ENABLE]]          =  !{!"llvm.loop.unroll.enable"}
-// CHECK: ![[FOR_NESTED_ENABLE]] =  distinct !{![[FOR_NESTED_ENABLE]], ![[ENABLE]]}
-// CHECK: ![[FOR_NESTED2_ENABLE]] =  distinct !{![[FOR_NESTED2_ENABLE]], ![[ENABLE]]}
-// CHECK: ![[FOR_NESTED2_1_ENABLE]] =  distinct !{![[FOR_NESTED2_1_ENABLE]], ![[ENABLE]]}
-// CHECK: ![[WHILE_DISTINCT]]   =  distinct !{![[WHILE_DISTINCT]],    ![[WHILE_COUNT:.*]]}
-// CHECK: ![[WHILE_COUNT]]         =  !{!"llvm.loop.unroll.count", i32 4}
-// CHECK: ![[WHILE_DISABLE]] =  distinct !{![[WHILE_DISABLE]],  ![[DISABLE]]}
-// CHECK: ![[WHILE_ENABLE]]    =  distinct !{![[WHILE_ENABLE]],     ![[ENABLE]]}
-// CHECK: ![[DO_DISTINCT]]      =  distinct !{![[DO_DISTINCT]],       ![[DO_COUNT:.*]]}
-// CHECK: ![[DO_COUNT]]         =  !{!"llvm.loop.unroll.count", i32 16}
-// CHECK: ![[DO_DISABLE]]    =  distinct !{![[DO_DISABLE]],     ![[DISABLE]]}
-// CHECK: ![[DO_ENABLE]]       =  distinct !{![[DO_ENABLE]],        ![[ENABLE]]}
+// CHECK-DAG: [[MUST_PROGRESS:.*]] = !{!"llvm.loop.mustprogress"}
+// CHECK-DAG: [[DISABLE:.*]] = !{!"llvm.loop.unroll.disable"}
+// CHECK-DAG: [[FOR_COUNT:.*]] =  !{!"llvm.loop.unroll.count", i32 8}
+// CHECK-DAG: [[ENABLE:.*]] = !{!"llvm.loop.unroll.enable"}
+// CHECK-DAG: [[WHILE_COUNT:.*]] = !{!"llvm.loop.unroll.count", i32 4}
+// CHECK-DAG: [[DO_COUNT:.*]] = !{!"llvm.loop.unroll.count", i32 16}
+
+// CHECK-DAG: ![[FOR_DISTINCT]] =  distinct !{![[FOR_DISTINCT]], [[MUST_PROGRESS]], [[FOR_COUNT]]}
+// CHECK-DAG: ![[FOR_DISABLE]] =  distinct !{![[FOR_DISABLE]], [[MUST_PROGRESS]], [[DISABLE]]}
+// CHECK-DAG: ![[FOR_ENABLE]] =  distinct !{![[FOR_ENABLE]], [[MUST_PROGRESS]], [[ENABLE]]}
+
+// CHECK-DAG: ![[FOR_NESTED_ENABLE_INNER]] = distinct !{![[FOR_NESTED_ENABLE_INNER]], [[MUST_PROGRESS]]}
+// CHECK-DAG: ![[FOR_NESTED_ENABLE_OUTER]] = distinct !{![[FOR_NESTED_ENABLE_OUTER]], [[MUST_PROGRESS]], [[ENABLE]]}
+// CHECK-DAG: ![[FOR_NESTED2_ENABLE]] =  distinct !{![[FOR_NESTED2_ENABLE]], [[MUST_PROGRESS]], [[ENABLE]]}
+// CHECK-DAG: ![[FOR_NESTED2_1_ENABLE]] =  distinct !{![[FOR_NESTED2_1_ENABLE]], [[MUST_PROGRESS]], [[ENABLE]]}
+// CHECK-DAG: ![[WHILE_DISTINCT]]   =  distinct !{![[WHILE_DISTINCT]], [[MUST_PROGRESS]], [[WHILE_COUNT]]}
+
+// CHECK-DAG: ![[WHILE_DISABLE]] =  distinct !{![[WHILE_DISABLE]], [[MUST_PROGRESS]], [[DISABLE]]}
+// CHECK-DAG: ![[WHILE_ENABLE]] =  distinct !{![[WHILE_ENABLE]], [[MUST_PROGRESS]], [[ENABLE]]}
+// CHECK-DAG: ![[DO_DISTINCT]]  =  distinct !{![[DO_DISTINCT]], [[MUST_PROGRESS]], [[DO_COUNT]]}
+
+// CHECK-DAG: ![[DO_DISABLE]] =  distinct !{![[DO_DISABLE]], [[MUST_PROGRESS]], [[DISABLE]]}
+// CHECK-DAG: ![[DO_ENABLE]] =  distinct !{![[DO_ENABLE]], [[MUST_PROGRESS]], [[ENABLE]]}
diff --git clang/test/CodeGenObjC/boxing.m clang/test/CodeGenObjC/boxing.m
index 3f857e089ded..c124f172fd37 100644
--- clang/test/CodeGenObjC/boxing.m
+++ clang/test/CodeGenObjC/boxing.m
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -o - %s -fexperimental-new-constant-interpreter | FileCheck %s
 
 typedef long NSInteger;
 typedef unsigned long NSUInteger;
diff --git clang/test/CodeGenObjC/dllstorage.m clang/test/CodeGenObjC/dllstorage.m
index c94f4c9b5804..a6c591b2d793 100644
--- clang/test/CodeGenObjC/dllstorage.m
+++ clang/test/CodeGenObjC/dllstorage.m
@@ -112,7 +112,7 @@ __declspec(dllimport)
 // CHECK-IR-DAG: @"OBJC_IVAR_$_M._ivar" = external dllimport global i32
 
 // CHECK-NF-DAG: @"$_OBJC_REF_CLASS_M" = external dllimport global ptr
-// CHECK-NF-DAG: @"__objc_ivar_offset_M._ivar.@" = external global i32
+// CHECK-NF-DAG: @"__objc_ivar_offset_M._ivar.@" = external dllimport global i32
 
 __declspec(dllexport)
 __attribute__((__objc_exception__))
@@ -151,7 +151,7 @@ id f(Q *q) {
 
 // CHECK-IR-DAG: @"OBJC_IVAR_$_M._ivar" = external dllimport global i32
 
-// CHECK-NF-DAG: @"__objc_ivar_offset_M._ivar.@" = external global i32
+// CHECK-NF-DAG: @"__objc_ivar_offset_M._ivar.@" = external dllimport global i32
 
 int g(void) {
   @autoreleasepool {
diff --git clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl
index d23005e018f3..72027eda4571 100644
--- clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl
+++ clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl
@@ -26,10 +26,11 @@ typedef enum memory_scope {
 #endif
 } memory_scope;
 
-// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at workgroup-one-as memory scope [-Rpass=atomic-expand]
-// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at agent-one-as memory scope [-Rpass=atomic-expand]
-// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at one-as memory scope [-Rpass=atomic-expand]
 // REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at wavefront-one-as memory scope [-Rpass=atomic-expand]
+// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at one-as memory scope [-Rpass=atomic-expand]
+// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at agent-one-as memory scope [-Rpass=atomic-expand]
+// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at workgroup-one-as memory scope [-Rpass=atomic-expand]
+
 // GFX90A-CAS-LABEL: @atomic_cas
 // GFX90A-CAS: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic
 // GFX90A-CAS: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("agent-one-as") monotonic
diff --git clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl
index 80ad9b4df8f6..7d684bc185a5 100644
--- clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl
+++ clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl
@@ -27,9 +27,10 @@ typedef enum memory_scope {
 #endif
 } memory_scope;
 
-// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope workgroup-one-as due to an unsafe request. [-Rpass=si-lower]
-// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope agent-one-as due to an unsafe request. [-Rpass=si-lower]
 // GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope wavefront-one-as due to an unsafe request. [-Rpass=si-lower]
+// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope agent-one-as due to an unsafe request. [-Rpass=si-lower]
+// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope workgroup-one-as due to an unsafe request. [-Rpass=si-lower]
+
 // GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc
 // GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc
 // GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc
diff --git clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
index cd6bfbe647ff..5d86a9b36942 100644
--- clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
+++ clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
@@ -22,3 +22,8 @@ kernel void builtins_amdgcn_s_barrier_signal_isfirst_err(global int* in, global
   __builtin_amdgcn_s_barrier_wait(-1);
   *out = *in;
 }
+
+void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int off)
+{
+  __builtin_amdgcn_s_buffer_prefetch_data(rsrc, off, 31); // expected-error {{'__builtin_amdgcn_s_buffer_prefetch_data' must be a constant integer}}
+}
diff --git clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
index d9ec258e644c..3d74667b62b8 100644
--- clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
+++ clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
@@ -256,4 +256,47 @@ void test_s_ttracedata_imm()
   __builtin_amdgcn_s_ttracedata_imm(1);
 }
 
+// CHECK-LABEL: @test_s_prefetch_data(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[GP_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[CP_ADDR:%.*]] = alloca ptr addrspace(4), align 8, addrspace(5)
+// CHECK-NEXT:    [[LEN_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    store ptr [[FP:%.*]], ptr addrspace(5) [[FP_ADDR]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[GP:%.*]], ptr addrspace(5) [[GP_ADDR]], align 8
+// CHECK-NEXT:    store ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[CP_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[LEN:%.*]], ptr addrspace(5) [[LEN_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8
+// CHECK-NEXT:    call void @llvm.amdgcn.s.prefetch.data.p0(ptr [[TMP0]], i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[GP_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[LEN_ADDR]], align 4
+// CHECK-NEXT:    call void @llvm.amdgcn.s.prefetch.data.p1(ptr addrspace(1) [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(5) [[CP_ADDR]], align 8
+// CHECK-NEXT:    call void @llvm.amdgcn.s.prefetch.data.p4(ptr addrspace(4) [[TMP3]], i32 31)
+// CHECK-NEXT:    ret void
+//
+void test_s_prefetch_data(int *fp, global float *gp, constant char *cp, unsigned int len)
+{
+  __builtin_amdgcn_s_prefetch_data(fp, 0);
+  __builtin_amdgcn_s_prefetch_data(gp, len);
+  __builtin_amdgcn_s_prefetch_data(cp, 31);
+}
 
+// CHECK-LABEL: @test_s_buffer_prefetch_data(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RSRC_ADDR:%.*]] = alloca ptr addrspace(8), align 16, addrspace(5)
+// CHECK-NEXT:    [[LEN_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    store ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(5) [[RSRC_ADDR]], align 16
+// CHECK-NEXT:    store i32 [[LEN:%.*]], ptr addrspace(5) [[LEN_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[RSRC_ADDR]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[LEN_ADDR]], align 4
+// CHECK-NEXT:    call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP0]], i32 128, i32 [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[RSRC_ADDR]], align 16
+// CHECK-NEXT:    call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP2]], i32 0, i32 31)
+// CHECK-NEXT:    ret void
+//
+void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int len)
+{
+  __builtin_amdgcn_s_buffer_prefetch_data(rsrc, 128, len);
+  __builtin_amdgcn_s_buffer_prefetch_data(rsrc, 0, 31);
+}
diff --git clang/test/CodeGenSYCL/address-space-deduction.cpp clang/test/CodeGenSYCL/address-space-deduction.cpp
index 96075a47343f..5910ec3bfc30 100644
--- clang/test/CodeGenSYCL/address-space-deduction.cpp
+++ clang/test/CodeGenSYCL/address-space-deduction.cpp
@@ -33,55 +33,55 @@
 // CHECK-NEXT:    store ptr addrspace(4) [[I_ASCAST]], ptr addrspace(4) [[PPTR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PPTR_ASCAST]], align 8
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr addrspace(4) [[TMP0]], [[I_ASCAST]]
-// CHECK-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[CMP]] to i8
-// CHECK-NEXT:    store i8 [[FROMBOOL]], ptr addrspace(4) [[IS_I_PTR_ASCAST]], align 1
+// CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[CMP]] to i8
+// CHECK-NEXT:    store i8 [[STOREDV]], ptr addrspace(4) [[IS_I_PTR_ASCAST]], align 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PPTR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 66, ptr addrspace(4) [[TMP1]], align 4
 // CHECK-NEXT:    store i32 23, ptr addrspace(4) [[VAR23_ASCAST]], align 4
 // CHECK-NEXT:    store ptr addrspace(4) [[VAR23_ASCAST]], ptr addrspace(4) [[CP_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[CP_ASCAST]], align 8
-// CHECK-NEXT:    store i8 41, ptr addrspace(4) [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[CP_ASCAST]], align 8
+// CHECK-NEXT:    store i8 41, ptr addrspace(4) [[TMP2]], align 1
 // CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [42 x i32], ptr addrspace(4) [[ARR_ASCAST]], i64 0, i64 0
 // CHECK-NEXT:    store ptr addrspace(4) [[ARRAYDECAY]], ptr addrspace(4) [[CPP_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[CPP_ASCAST]], align 8
-// CHECK-NEXT:    store i8 43, ptr addrspace(4) [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[CPP_ASCAST]], align 8
+// CHECK-NEXT:    store i8 43, ptr addrspace(4) [[TMP3]], align 1
 // CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [42 x i32], ptr addrspace(4) [[ARR_ASCAST]], i64 0, i64 0
 // CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[ARRAYDECAY1]], i64 10
 // CHECK-NEXT:    store ptr addrspace(4) [[ADD_PTR]], ptr addrspace(4) [[APTR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP6:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[APTR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[APTR_ASCAST]], align 8
 // CHECK-NEXT:    [[ARRAYDECAY2:%.*]] = getelementptr inbounds [42 x i32], ptr addrspace(4) [[ARR_ASCAST]], i64 0, i64 0
-// CHECK-NEXT:    [[ADD_PTR3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[ARRAYDECAY2]], i64 168
-// CHECK-NEXT:    [[CMP4:%.*]] = icmp ult ptr addrspace(4) [[TMP6]], [[ADD_PTR3]]
+// CHECK-NEXT:    [[ADD_PTR3:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[ARRAYDECAY2]], i64 168
+// CHECK-NEXT:    [[CMP4:%.*]] = icmp ult ptr addrspace(4) [[TMP4]], [[ADD_PTR3]]
 // CHECK-NEXT:    br i1 [[CMP4]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 // CHECK:       if.then:
-// CHECK-NEXT:    [[TMP7:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[APTR_ASCAST]], align 8
-// CHECK-NEXT:    store i32 44, ptr addrspace(4) [[TMP7]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[APTR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 44, ptr addrspace(4) [[TMP5]], align 4
 // CHECK-NEXT:    br label [[IF_END]]
 // CHECK:       if.end:
 // CHECK-NEXT:    store ptr addrspace(4) addrspacecast (ptr addrspace(1) @.str to ptr addrspace(4)), ptr addrspace(4) [[STR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP8:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[STR_ASCAST]], align 8
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP8]], i64 0
-// CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr addrspace(4) [[ARRAYIDX]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP9]] to i32
+// CHECK-NEXT:    [[TMP6:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[STR_ASCAST]], align 8
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP6]], i64 0
+// CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr addrspace(4) [[ARRAYIDX]], align 1
+// CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP7]] to i32
 // CHECK-NEXT:    store i32 [[CONV]], ptr addrspace(4) [[I_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr addrspace(4) [[I_ASCAST]], align 4
-// CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr addrspace(4) [[I_ASCAST]], align 4
+// CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], 2
 // CHECK-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
 // CHECK:       cond.true:
-// CHECK-NEXT:    [[TMP11:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[STR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[STR_ASCAST]], align 8
 // CHECK-NEXT:    br label [[COND_END:%.*]]
 // CHECK:       cond.false:
 // CHECK-NEXT:    br label [[COND_END]]
 // CHECK:       cond.end:
-// CHECK-NEXT:    [[COND:%.*]] = phi ptr addrspace(4) [ [[TMP11]], [[COND_TRUE]] ], [ addrspacecast (ptr addrspace(1) @.str.1 to ptr addrspace(4)), [[COND_FALSE]] ]
+// CHECK-NEXT:    [[COND:%.*]] = phi ptr addrspace(4) [ [[TMP9]], [[COND_TRUE]] ], [ addrspacecast (ptr addrspace(1) @.str.1 to ptr addrspace(4)), [[COND_FALSE]] ]
 // CHECK-NEXT:    store ptr addrspace(4) [[COND]], ptr addrspace(4) [[PHI_STR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr addrspace(4) [[I_ASCAST]], align 4
-// CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], 2
-// CHECK-NEXT:    [[TMP13:%.*]] = zext i1 [[CMP6]] to i64
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr addrspace(4) [[I_ASCAST]], align 4
+// CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], 2
+// CHECK-NEXT:    [[TMP11:%.*]] = zext i1 [[CMP6]] to i64
 // CHECK-NEXT:    [[COND7:%.*]] = select i1 [[CMP6]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @.str.2 to ptr addrspace(4)), ptr addrspace(4) null
 // CHECK-NEXT:    store ptr addrspace(4) [[COND7]], ptr addrspace(4) [[SELECT_NULL_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP14:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[STR_ASCAST]], align 8
-// CHECK-NEXT:    store ptr addrspace(4) [[TMP14]], ptr addrspace(4) [[SELECT_STR_TRIVIAL1_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP12:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[STR_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(4) [[TMP12]], ptr addrspace(4) [[SELECT_STR_TRIVIAL1_ASCAST]], align 8
 // CHECK-NEXT:    store ptr addrspace(4) addrspacecast (ptr addrspace(1) @.str.1 to ptr addrspace(4)), ptr addrspace(4) [[SELECT_STR_TRIVIAL2_ASCAST]], align 8
 // CHECK-NEXT:    ret void
 //
diff --git clang/test/Driver/Ofast.c clang/test/Driver/Ofast.c
index 91de296a4c3f..b5189e951cc6 100644
--- clang/test/Driver/Ofast.c
+++ clang/test/Driver/Ofast.c
@@ -1,14 +1,14 @@
-// RUN: %clang -Ofast -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST %s
-// RUN: %clang -O2 -Ofast -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST %s
-// RUN: %clang -fno-fast-math -Ofast -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST %s
-// RUN: %clang -fno-strict-aliasing -Ofast -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST %s
-// RUN: %clang -fno-vectorize -Ofast -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST %s
-// RUN: %clang -Ofast -O2 -### -Werror %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST-O2 \
+// RUN: %clang -c -Ofast -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST %s
+// RUN: %clang -c -O2 -Ofast -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST %s
+// RUN: %clang -c -fno-fast-math -Ofast -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST %s
+// RUN: %clang -c -fno-strict-aliasing -Ofast -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST %s
+// RUN: %clang -c -fno-vectorize -Ofast -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST %s
+// RUN: %clang -c -Ofast -O2 -### -Werror %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST-O2 \
 // RUN:  %if target={{.*-windows-msvc.*}} %{ --check-prefix=CHECK-OFAST-O2-ALIASING-MSVC %} \
 // RUN:  %else %{ --check-prefix=CHECK-OFAST-O2-ALIASING %} %s
-// RUN: %clang -Ofast -fno-fast-math -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST-NO-FAST-MATH %s
-// RUN: %clang -Ofast -fno-strict-aliasing -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST-NO-STRICT-ALIASING %s
-// RUN: %clang -Ofast -fno-vectorize -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST-NO-VECTORIZE %s
+// RUN: %clang -c -Ofast -fno-fast-math -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST-NO-FAST-MATH %s
+// RUN: %clang -c -Ofast -fno-strict-aliasing -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST-NO-STRICT-ALIASING %s
+// RUN: %clang -c -Ofast -fno-vectorize -### %s 2>&1 | FileCheck -check-prefix=CHECK-OFAST-NO-VECTORIZE %s
 
 // CHECK-OFAST: use '-O3 -ffast-math' for the same behavior, or '-O3' to enable only conforming optimizations
 // CHECK-OFAST: -cc1
diff --git clang/test/Driver/cl-link.c clang/test/Driver/cl-link.c
index ffd0b5ac4bad..f52604427604 100644
--- clang/test/Driver/cl-link.c
+++ clang/test/Driver/cl-link.c
@@ -13,10 +13,8 @@
 // ASAN: link.exe
 // ASAN: "-debug"
 // ASAN: "-incremental:no"
-// ASAN: "{{[^"]*}}clang_rt.asan.lib"
-// ASAN: "-wholearchive:{{.*}}clang_rt.asan.lib"
-// ASAN: "{{[^"]*}}clang_rt.asan_cxx.lib"
-// ASAN: "-wholearchive:{{.*}}clang_rt.asan_cxx.lib"
+// ASAN: "{{[^"]*}}clang_rt.asan_dynamic.lib"
+// ASAN: "-wholearchive:{{.*}}clang_rt.asan_static_runtime_thunk.lib"
 // ASAN: "{{.*}}cl-link{{.*}}.obj"
 
 // RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s
@@ -24,7 +22,6 @@
 // ASAN-MD: "-debug"
 // ASAN-MD: "-incremental:no"
 // ASAN-MD: "{{.*}}clang_rt.asan_dynamic.lib"
-// ASAN-MD: "{{[^"]*}}clang_rt.asan_dynamic_runtime_thunk.lib"
 // ASAN-MD: "-include:___asan_seh_interceptor"
 // ASAN-MD: "-wholearchive:{{.*}}clang_rt.asan_dynamic_runtime_thunk.lib"
 // ASAN-MD: "{{.*}}cl-link{{.*}}.obj"
@@ -40,7 +37,8 @@
 // ASAN-DLL: "-dll"
 // ASAN-DLL: "-debug"
 // ASAN-DLL: "-incremental:no"
-// ASAN-DLL: "{{.*}}clang_rt.asan_dll_thunk.lib"
+// ASAN-DLL: "{{.*}}clang_rt.asan_dynamic.lib"
+// ASAN-DLL: "-wholearchive:{{.*}}clang_rt.asan_static_runtime_thunk.lib"
 // ASAN-DLL: "{{.*}}cl-link{{.*}}.obj"
 
 // RUN: %clang_cl /Zi /Tc%s -fuse-ld=link -### 2>&1 | FileCheck --check-prefix=DEBUG %s
diff --git clang/test/Driver/clang_f_opts.c clang/test/Driver/clang_f_opts.c
index d69cd199ac61..335fa546a138 100644
--- clang/test/Driver/clang_f_opts.c
+++ clang/test/Driver/clang_f_opts.c
@@ -600,10 +600,12 @@
 // CHECK_NO_DISABLE_DIRECT-NOT: -fobjc-disable-direct-methods-for-testing
 
 // RUN: %clang -### -S -fjmc -target x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefixes=CHECK_JMC_WARN,CHECK_NOJMC %s
-// RUN: %clang -### -S -fjmc -target x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefixes=CHECK_JMC_WARN_NOT_ELF,CHECK_NOJMC %s
+// RUN: %clang -### -S -fjmc -target x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefixes=CHECK_JMC_WARN,CHECK_NOJMC %s
+// RUN: %clang -### -S -fjmc -g -target x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefix=CHECK_JMC %s
+// RUN: %clang -### -S -fjmc -g -fno-jmc -target x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefix=CHECK_NOJMC %s
 // RUN: %clang -### -S -fjmc -g -target x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefix=CHECK_JMC %s
 // RUN: %clang -### -S -fjmc -g -fno-jmc -target x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefix=CHECK_NOJMC %s
-// RUN: %clang -### -fjmc -g -flto -target x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefixes=CHECK_JMC_WARN_NOT_ELF,CHECK_NOJMC_LTO %s
+// RUN: %clang -### -fjmc -g -flto -target x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefix=CHECK_NOJMC_LTO %s
 // RUN: %clang -### -fjmc -g -flto -target x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefix=CHECK_JMC_LTO %s
 // RUN: %clang -### -fjmc -g -flto -fno-jmc -target x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefix=CHECK_NOJMC_LTO %s
 // CHECK_JMC_WARN: -fjmc requires debug info. Use -g or debug options that enable debugger's stepping function; option ignored
diff --git clang/test/Driver/fsanitize.c clang/test/Driver/fsanitize.c
index f86c978f221c..6ecf0b57bee5 100644
--- clang/test/Driver/fsanitize.c
+++ clang/test/Driver/fsanitize.c
@@ -197,6 +197,8 @@
 // CHECK-SANMT-MT: "-target-feature" "+mte"
 // CHECK-SANMT-MT-SAME: "-fsanitize=memtag-stack,memtag-heap,memtag-globals"
 
+// RUN: not %clang --target=aarch64-linux -fsanitize=memtag -Xclang -target-feature -Xclang +mte %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANMT-MT
+
 // RUN: not %clang --target=aarch64-linux -fsanitize=memtag %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANMT-NOMT-0
 // CHECK-SANMT-NOMT-0: '-fsanitize=memtag-stack' requires hardware support (+memtag)
 
@@ -726,8 +728,8 @@
 // NO-SP-NOT: stack-protector
 // NO-SP: "-fsanitize=safe-stack"
 // SP-ASAN: error: invalid argument '-fsanitize=safe-stack' not allowed with '-fsanitize=address'
-// SP: "-fsanitize=safe-stack"
 // SP: -stack-protector
+// SP: "-fsanitize=safe-stack"
 // NO-SP-NOT: stack-protector
 
 // RUN: %clang --target=powerpc64-unknown-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-SANM
diff --git clang/test/Driver/fuchsia.c clang/test/Driver/fuchsia.c
index c67f7f8c005b..83dee1698169 100644
--- clang/test/Driver/fuchsia.c
+++ clang/test/Driver/fuchsia.c
@@ -30,10 +30,10 @@
 // CHECK: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK: "-internal-externc-isystem" "[[SYSROOT]]{{/|\\\\}}include"
+// CHECK: "-stack-protector" "2"
 // CHECK-AARCH64: "-fsanitize=shadow-call-stack"
 // CHECK-RISCV64: "-fsanitize=shadow-call-stack"
 // CHECK-X86_64: "-fsanitize=safe-stack"
-// CHECK: "-stack-protector" "2"
 // CHECK-AARCH64: "-target-feature" "+outline-atomics"
 // CHECK-NOT: "-fcommon"
 // CHECK: {{.*}}ld.lld{{.*}}" "-z" "max-page-size=4096" "-z" "now" "-z" "start-stop-visibility=hidden" "-z" "rodynamic" "-z" "separate-loadable-segments" "-z" "rel" "--pack-dyn-relocs=relr"
diff --git clang/test/Driver/hip-cuid.hip clang/test/Driver/hip-cuid.hip
index ed7de782bba5..2e38c59ccf5e 100644
--- clang/test/Driver/hip-cuid.hip
+++ clang/test/Driver/hip-cuid.hip
@@ -58,6 +58,28 @@
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck -check-prefixes=COMMON,HEX %s
 
+// Check that cuid is propagated to the host-only compilation.
+// RUN: %clang -### -x hip \
+// RUN:   --target=x86_64-unknown-linux-gnu \
+// RUN:   --no-offload-new-driver \
+// RUN:   --offload-arch=gfx900 \
+// RUN:   --offload-host-only \
+// RUN:   -c -nogpuinc -nogpulib -cuid=xyz_123 \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=HOST %s
+
+// Check that cuid is propagated to the device-only compilation.
+// RUN: %clang -### -x hip \
+// RUN:   --target=x86_64-unknown-linux-gnu \
+// RUN:   --no-offload-new-driver \
+// RUN:   --offload-arch=gfx900 \
+// RUN:   --offload-device-only \
+// RUN:   -c -nogpuinc -nogpulib -cuid=xyz_123 \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=DEVICE %s
+
 // INVALID: invalid value 'invalid' in '-fuse-cuid=invalid'
 
 // COMMON: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa"
@@ -92,3 +114,21 @@
 // HEX-NOT: "-cuid=[[CUID]]"
 // COMMON-SAME: "-cuid=[[CUID2]]"
 // COMMON-SAME: "{{.*}}b.hip"
+
+// HOST: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu"
+// HOST-SAME: "-cuid=[[CUID:xyz_123]]"
+// HOST-SAME: "{{.*}}a.cu"
+
+// HOST: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu"
+// HOST-SAME: "-cuid=[[CUID]]"
+// HOST-SAME: "{{.*}}b.hip"
+
+// DEVICE: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa"
+// DEVICE-SAME: "-target-cpu" "gfx900"
+// DEVICE-SAME: "-cuid=[[CUID:xyz_123]]"
+// DEVICE-SAME: "{{.*}}a.cu"
+
+// DEVICE: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa"
+// DEVICE-SAME: "-target-cpu" "gfx900"
+// DEVICE-SAME: "-cuid=[[CUID]]"
+// DEVICE-SAME: "{{.*}}b.hip"
diff --git clang/test/Driver/mcmodel.c clang/test/Driver/mcmodel.c
index 9681c32579d7..c6c8b5433d23 100644
--- clang/test/Driver/mcmodel.c
+++ clang/test/Driver/mcmodel.c
@@ -43,5 +43,4 @@
 // AARCH64-PIC-LARGE: error: invalid argument '-mcmodel=large' only allowed with '-fno-pic'
 // ERR-AARCH64_32: error: unsupported argument 'small' to option '-mcmodel=' for target 'aarch64_32-unknown-linux'
 
-// ERR-LOONGARCH64-PLT-LARGE: error: invalid argument '-mcmodel=large' not allowed with '-fplt'
 // ERR-LOONGARCH64-PLT-EXTREME: error: invalid argument '-mcmodel=extreme' not allowed with '-fplt'
diff --git clang/test/Driver/ps4-ps5-toolchain.c clang/test/Driver/ps4-ps5-toolchain.c
index 444e9df24714..c9987c2b5758 100644
--- clang/test/Driver/ps4-ps5-toolchain.c
+++ clang/test/Driver/ps4-ps5-toolchain.c
@@ -11,3 +11,8 @@
 // RUN: %clang %s -### -target x86_64-sie-ps5 -flto 2>&1 | FileCheck %s --check-prefix=LTO
 // LTO-NOT: error:
 // LTO-NOT: unable to pass LLVM bit-code
+
+// Verify that the jump table sizes section is enabled.
+// RUN: %clang %s -target x86_64-sie-ps5 -### 2>&1 | FileCheck -check-prefix=JUMPTABLESIZES %s
+// JUMPTABLESIZES: "-mllvm" "-emit-jump-table-sizes-section"
+// JUMPTABLESIZES: "-plugin-opt=-emit-jump-table-sizes-section"
diff --git clang/test/Driver/ps4-sdk-root.c clang/test/Driver/ps4-sdk-root.c
index e1a04522030c..3e02fa9fc3bc 100644
--- clang/test/Driver/ps4-sdk-root.c
+++ clang/test/Driver/ps4-sdk-root.c
@@ -6,9 +6,8 @@
 
 // Check that PS4 clang doesn't report a warning message when locating
 // system libraries (either by looking at the value of SCE_ORBIS_SDK_DIR
-// or relative to the location of the compiler driver), if "-c", "-S", "-E",
-// "--sysroot", "-nostdlib" or "-nodefaultlibs" option is specified on
-// the command line.
+// or relative to the location of the compiler driver), if "-c", "-S", "-E"
+// or "--sysroot" option is specified on the command line.
 // Otherwise, check that PS4 clang reports a warning.
 
 // Setting up SCE_ORBIS_SDK_DIR to existing location, which is not a PS4 SDK.
@@ -36,9 +35,6 @@
 // RUN: env SCE_ORBIS_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -emit-ast -isysroot foo -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-ISYSROOT -check-prefix=NO-WARN %s
 // RUN: env SCE_ORBIS_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### --sysroot=foo/ -isysroot foo -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-ISYSROOT -check-prefix=NO-WARN %s
 
-// RUN: env SCE_ORBIS_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -nostdlib -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-SYS-HEADERS -check-prefix=NO-WARN %s
-// RUN: env SCE_ORBIS_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -nodefaultlibs -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-SYS-HEADERS -check-prefix=NO-WARN %s
-
 // NO-WARN-NOT: {{warning:|error:}}
 // WARN-SYS-HEADERS: warning: unable to find PS4 system headers directory
 // WARN-ISYSROOT: warning: no such sysroot directory: 'foo'
diff --git clang/test/Driver/ps5-sdk-root.c clang/test/Driver/ps5-sdk-root.c
index c3672aef9dc0..2a82d8e72283 100644
--- clang/test/Driver/ps5-sdk-root.c
+++ clang/test/Driver/ps5-sdk-root.c
@@ -8,12 +8,11 @@
 
 // Check that PS5 clang doesn't report a warning message when locating
 // system libraries (either by looking at the value of SCE_PROSPERO_SDK_DIR
-// or relative to the location of the compiler driver), if "-c", "-S", "-E",
-// "--sysroot", "-nostdlib" or "-nodefaultlibs" option is specified on
-// the command line.
+// or relative to the location of the compiler driver), if "-c", "-S", "-E"
+// or "--sysroot" option is specified on the command line.
 // Otherwise, check that PS5 clang reports a warning.
 
-// Setting up SCE_PROSPERO_SDK_DIR to existing location, which is not a PS4 SDK.
+// Setting up SCE_PROSPERO_SDK_DIR to existing location, which is not a PS5 SDK.
 // RUN: env SCE_PROSPERO_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -target x86_64-sie-ps5 %s 2>&1 | FileCheck -check-prefix=WARN-SYS-HEADERS -check-prefix=WARN-SYS-LIBS -check-prefix=NO-WARN %s
 
 // RUN: env SCE_PROSPERO_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -c -target x86_64-sie-ps5 %s 2>&1 | FileCheck -check-prefix=WARN-SYS-HEADERS -check-prefix=NO-WARN %s
@@ -38,9 +37,6 @@
 // RUN: env SCE_PROSPERO_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -emit-ast -isysroot foo -target x86_64-sie-ps5 %s 2>&1 | FileCheck -check-prefix=WARN-ISYSROOT -check-prefix=NO-WARN %s
 // RUN: env SCE_PROSPERO_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### --sysroot=foo/ -isysroot foo -target x86_64-sie-ps5 %s 2>&1 | FileCheck -check-prefix=WARN-ISYSROOT -check-prefix=NO-WARN %s
 
-// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -nostdlib -target x86_64-sie-ps5 %s 2>&1 | FileCheck -check-prefix=WARN-SYS-HEADERS -check-prefix=NO-WARN %s
-// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -nodefaultlibs -target x86_64-sie-ps5 %s 2>&1 | FileCheck -check-prefix=WARN-SYS-HEADERS -check-prefix=NO-WARN %s
-
 // NO-WARN-NOT: {{warning:|error:}}
 // WARN-SYS-HEADERS: warning: unable to find PS5 system headers directory
 // WARN-ISYSROOT: warning: no such sysroot directory: 'foo'
diff --git clang/test/Driver/riscv-mcmodel.c clang/test/Driver/riscv-mcmodel.c
new file mode 100644
index 000000000000..c27d7c63a75a
--- /dev/null
+++ clang/test/Driver/riscv-mcmodel.c
@@ -0,0 +1,23 @@
+// RUN: %clang --target=riscv32 -### -c -mcmodel=small %s 2>&1 | FileCheck --check-prefix=SMALL %s
+// RUN: %clang --target=riscv64 -### -c -mcmodel=small %s 2>&1 | FileCheck --check-prefix=SMALL %s
+
+// RUN: %clang --target=riscv32 -### -c -mcmodel=medlow %s 2>&1 | FileCheck --check-prefix=SMALL %s
+// RUN: %clang --target=riscv64 -### -c -mcmodel=medlow %s 2>&1 | FileCheck --check-prefix=SMALL %s
+
+// RUN: %clang --target=riscv32 -### -c -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=MEDIUM %s
+// RUN: %clang --target=riscv64 -### -c -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=MEDIUM %s
+
+// RUN: %clang --target=riscv32 -### -c -mcmodel=medany %s 2>&1 | FileCheck --check-prefix=MEDIUM %s
+// RUN: %clang --target=riscv64 -### -c -mcmodel=medany %s 2>&1 | FileCheck --check-prefix=MEDIUM %s
+
+// RUN: not %clang --target=riscv32 -### -c -mcmodel=large %s 2>&1 | FileCheck --check-prefix=ERR-LARGE %s
+// RUN: %clang --target=riscv64 -### -c -mcmodel=large %s 2>&1 | FileCheck --check-prefix=LARGE %s
+
+// RUN: not %clang --target=riscv64 -### -c -mcmodel=large -fpic %s 2>&1 | FileCheck --check-prefix=LARGE %s
+
+// SMALL: "-mcmodel=small"
+// MEDIUM: "-mcmodel=medium"
+// LARGE: "-mcmodel=large"
+
+// ERR-LARGE:  error: unsupported argument 'large' to option '-mcmodel=' for target 'riscv32'
+// ERR-PIC-LARGE:  error: invalid argument '-mcmodel=large' not allowed with '-fpic'
diff --git clang/test/ExtractAPI/attributed-typedef.m clang/test/ExtractAPI/attributed-typedef.m
new file mode 100644
index 000000000000..c948c873ab75
--- /dev/null
+++ clang/test/ExtractAPI/attributed-typedef.m
@@ -0,0 +1,24 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing \
+// RUN:   -triple arm64-apple-macosx -x objective-c-header %s -o %t/output.symbols.json
+
+_Pragma("clang assume_nonnull begin")
+
+struct Foo { int a; };
+typedef struct Foo *Bar;
+// RUN: FileCheck %s -input-file %t/output.symbols.json --check-prefix FUNC
+void func(Bar b);
+// FUNC-LABEL: "!testLabel": "c:@F@func",
+// CHECK-NOT: Foo
+// CHECK: "pathComponents"
+
+// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix THING
+#define SWIFT_NAME(_name) __attribute__((swift_name(#_name)))
+extern Bar const thing SWIFT_NAME(swiftThing);
+// THING-LABEL: "!testLabel": "c:@thing"
+// THING-NOT: Foo
+// THING: "pathComponents"
+
+_Pragma("clang assume_nonnull end")
+
+// expected-no-diagnostics
diff --git clang/test/Headers/__clang_hip_math.hip clang/test/Headers/__clang_hip_math.hip
index 9d202e0d0468..e4254d1e64be 100644
--- clang/test/Headers/__clang_hip_math.hip
+++ clang/test/Headers/__clang_hip_math.hip
@@ -47,7 +47,7 @@ typedef unsigned long long uint64_t;
 // CHECK-NEXT:    [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64
 // CHECK-NEXT:    [[ADD_I:%.*]] = add i64 [[MUL_I]], -48
 // CHECK-NEXT:    [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I]], i64 1
+// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I]], i64 1
 // CHECK-NEXT:    br label [[CLEANUP_I]]
 // CHECK:       cleanup.i:
 // CHECK-NEXT:    [[__TAGP_ADDR_1_I]] = phi ptr [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ], [ [[__TAGP_ADDR_0_I]], [[WHILE_BODY_I]] ]
@@ -79,7 +79,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) {
 // CHECK-NEXT:    [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64
 // CHECK-NEXT:    [[ADD_I:%.*]] = add i64 [[MUL_I]], -48
 // CHECK-NEXT:    [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I]], i64 1
+// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I]], i64 1
 // CHECK-NEXT:    br label [[CLEANUP_I]]
 // CHECK:       cleanup.i:
 // CHECK-NEXT:    [[__TAGP_ADDR_1_I]] = phi ptr [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ], [ [[__TAGP_ADDR_0_I]], [[WHILE_BODY_I]] ]
@@ -120,7 +120,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) {
 // CHECK-NEXT:    [[CONV25_I:%.*]] = zext nneg i8 [[TMP0]] to i64
 // CHECK-NEXT:    [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]]
 // CHECK-NEXT:    [[ADD28_I:%.*]] = add i64 [[ADD26_I]], [[CONV25_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I]], i64 1
+// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I]], i64 1
 // CHECK-NEXT:    br label [[CLEANUP_I]]
 // CHECK:       cleanup.i:
 // CHECK-NEXT:    [[__TAGP_ADDR_1_I]] = phi ptr [ [[INCDEC_PTR_I]], [[IF_END31_I]] ], [ [[__TAGP_ADDR_0_I]], [[IF_ELSE17_I]] ]
@@ -141,7 +141,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i8 [[TMP0]], 48
 // CHECK-NEXT:    br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[WHILE_COND_I14_I:%.*]]
 // CHECK:       if.then.i:
-// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1
+// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
 // CHECK-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I:%.*]] [
 // CHECK-NEXT:      i8 120, label [[WHILE_COND_I30_I_PREHEADER:%.*]]
@@ -173,7 +173,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // CHECK-NEXT:    [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
 // CHECK-NEXT:    [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]]
 // CHECK-NEXT:    [[ADD28_I_I:%.*]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I40_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I]], i64 1
+// CHECK-NEXT:    [[INCDEC_PTR_I40_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I]], i64 1
 // CHECK-NEXT:    br label [[CLEANUP_I36_I]]
 // CHECK:       cleanup.i36.i:
 // CHECK-NEXT:    [[__TAGP_ADDR_1_I37_I]] = phi ptr [ [[INCDEC_PTR_I40_I]], [[IF_END31_I_I]] ], [ [[__TAGP_ADDR_0_I31_I]], [[IF_ELSE17_I_I]] ]
@@ -195,7 +195,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // CHECK-NEXT:    [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
 // CHECK-NEXT:    [[ADD_I_I:%.*]] = add i64 [[MUL_I_I]], -48
 // CHECK-NEXT:    [[SUB_I_I:%.*]] = add i64 [[ADD_I_I]], [[CONV5_I_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I]], i64 1
+// CHECK-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I]], i64 1
 // CHECK-NEXT:    br label [[CLEANUP_I_I]]
 // CHECK:       cleanup.i.i:
 // CHECK-NEXT:    [[__TAGP_ADDR_1_I_I]] = phi ptr [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ], [ [[__TAGP_ADDR_0_I_I]], [[WHILE_BODY_I_I]] ]
@@ -216,7 +216,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // CHECK-NEXT:    [[CONV5_I26_I:%.*]] = zext nneg i8 [[TMP8]] to i64
 // CHECK-NEXT:    [[ADD_I27_I:%.*]] = add i64 [[MUL_I25_I]], -48
 // CHECK-NEXT:    [[SUB_I28_I:%.*]] = add i64 [[ADD_I27_I]], [[CONV5_I26_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I29_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I]], i64 1
+// CHECK-NEXT:    [[INCDEC_PTR_I29_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I]], i64 1
 // CHECK-NEXT:    br label [[CLEANUP_I20_I]]
 // CHECK:       cleanup.i20.i:
 // CHECK-NEXT:    [[__TAGP_ADDR_1_I21_I]] = phi ptr [ [[INCDEC_PTR_I29_I]], [[IF_THEN_I24_I]] ], [ [[__TAGP_ADDR_0_I15_I]], [[WHILE_BODY_I18_I]] ]
@@ -2367,7 +2367,7 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // DEFAULT-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
 // DEFAULT-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
 // DEFAULT:       if.then.i.i:
-// DEFAULT-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[TAG]], i64 1
+// DEFAULT-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
 // DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
 // DEFAULT-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
 // DEFAULT-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
@@ -2399,7 +2399,7 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // DEFAULT-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
 // DEFAULT-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
 // DEFAULT-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
+// DEFAULT-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
 // DEFAULT-NEXT:    br label [[CLEANUP_I36_I_I]]
 // DEFAULT:       cleanup.i36.i.i:
 // DEFAULT-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
@@ -2421,7 +2421,7 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // DEFAULT-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
 // DEFAULT-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // DEFAULT-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
+// DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
 // DEFAULT-NEXT:    br label [[CLEANUP_I_I_I]]
 // DEFAULT:       cleanup.i.i.i:
 // DEFAULT-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
@@ -2442,7 +2442,7 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // DEFAULT-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
 // DEFAULT-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
 // DEFAULT-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
+// DEFAULT-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
 // DEFAULT-NEXT:    br label [[CLEANUP_I20_I_I]]
 // DEFAULT:       cleanup.i20.i.i:
 // DEFAULT-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
@@ -2466,7 +2466,7 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // APPROX-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
 // APPROX-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
 // APPROX:       if.then.i.i:
-// APPROX-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[TAG]], i64 1
+// APPROX-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
 // APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
 // APPROX-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
 // APPROX-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
@@ -2498,7 +2498,7 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // APPROX-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
 // APPROX-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
 // APPROX-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
+// APPROX-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
 // APPROX-NEXT:    br label [[CLEANUP_I36_I_I]]
 // APPROX:       cleanup.i36.i.i:
 // APPROX-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
@@ -2520,7 +2520,7 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // APPROX-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
 // APPROX-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // APPROX-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
+// APPROX-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
 // APPROX-NEXT:    br label [[CLEANUP_I_I_I]]
 // APPROX:       cleanup.i.i.i:
 // APPROX-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
@@ -2541,7 +2541,7 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // APPROX-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
 // APPROX-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
 // APPROX-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
+// APPROX-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
 // APPROX-NEXT:    br label [[CLEANUP_I20_I_I]]
 // APPROX:       cleanup.i20.i.i:
 // APPROX-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
@@ -2565,7 +2565,7 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // DEFAULT-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
 // DEFAULT-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
 // DEFAULT:       if.then.i.i:
-// DEFAULT-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[TAG]], i64 1
+// DEFAULT-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
 // DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
 // DEFAULT-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
 // DEFAULT-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
@@ -2597,7 +2597,7 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // DEFAULT-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
 // DEFAULT-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
 // DEFAULT-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
+// DEFAULT-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
 // DEFAULT-NEXT:    br label [[CLEANUP_I36_I_I]]
 // DEFAULT:       cleanup.i36.i.i:
 // DEFAULT-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
@@ -2619,7 +2619,7 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // DEFAULT-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
 // DEFAULT-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // DEFAULT-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
+// DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
 // DEFAULT-NEXT:    br label [[CLEANUP_I_I_I]]
 // DEFAULT:       cleanup.i.i.i:
 // DEFAULT-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
@@ -2640,7 +2640,7 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // DEFAULT-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
 // DEFAULT-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
 // DEFAULT-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
+// DEFAULT-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
 // DEFAULT-NEXT:    br label [[CLEANUP_I20_I_I]]
 // DEFAULT:       cleanup.i20.i.i:
 // DEFAULT-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
@@ -2663,7 +2663,7 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // APPROX-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
 // APPROX-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
 // APPROX:       if.then.i.i:
-// APPROX-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[TAG]], i64 1
+// APPROX-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
 // APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
 // APPROX-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
 // APPROX-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
@@ -2695,7 +2695,7 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // APPROX-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
 // APPROX-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
 // APPROX-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
+// APPROX-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
 // APPROX-NEXT:    br label [[CLEANUP_I36_I_I]]
 // APPROX:       cleanup.i36.i.i:
 // APPROX-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
@@ -2717,7 +2717,7 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // APPROX-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
 // APPROX-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // APPROX-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
+// APPROX-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
 // APPROX-NEXT:    br label [[CLEANUP_I_I_I]]
 // APPROX:       cleanup.i.i.i:
 // APPROX-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
@@ -2738,7 +2738,7 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // APPROX-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
 // APPROX-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
 // APPROX-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
+// APPROX-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
 // APPROX-NEXT:    br label [[CLEANUP_I20_I_I]]
 // APPROX:       cleanup.i20.i.i:
 // APPROX-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
@@ -3059,7 +3059,7 @@ extern "C" __device__ double test_normcdfinv(double x) {
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]]
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]]
 // DEFAULT-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 4
+// DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // DEFAULT-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
 // DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
 // DEFAULT:       _ZL5normfiPKf.exit:
@@ -3079,7 +3079,7 @@ extern "C" __device__ double test_normcdfinv(double x) {
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]]
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract float [[TMP0]], [[TMP0]]
 // FINITEONLY-NEXT:    [[ADD_I]] = fadd nnan ninf contract float [[__R_0_I4]], [[MUL_I]]
-// FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 4
+// FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // FINITEONLY-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
 // FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
 // FINITEONLY:       _ZL5normfiPKf.exit:
@@ -3099,7 +3099,7 @@ extern "C" __device__ double test_normcdfinv(double x) {
 // APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]]
 // APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]]
 // APPROX-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 4
+// APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // APPROX-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
 // APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
 // APPROX:       _ZL5normfiPKf.exit:
@@ -3123,7 +3123,7 @@ extern "C" __device__ float test_normf(int x, const float *y) {
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]]
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]]
 // DEFAULT-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 8
+// DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // DEFAULT-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
 // DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
 // DEFAULT:       _ZL4normiPKd.exit:
@@ -3143,7 +3143,7 @@ extern "C" __device__ float test_normf(int x, const float *y) {
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]]
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract double [[TMP0]], [[TMP0]]
 // FINITEONLY-NEXT:    [[ADD_I]] = fadd nnan ninf contract double [[__R_0_I4]], [[MUL_I]]
-// FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 8
+// FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // FINITEONLY-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
 // FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
 // FINITEONLY:       _ZL4normiPKd.exit:
@@ -3163,7 +3163,7 @@ extern "C" __device__ float test_normf(int x, const float *y) {
 // APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]]
 // APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]]
 // APPROX-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 8
+// APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // APPROX-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
 // APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
 // APPROX:       _ZL4normiPKd.exit:
@@ -3483,7 +3483,7 @@ extern "C" __device__ double test_rint(double x) {
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]]
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]]
 // DEFAULT-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 4
+// DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // DEFAULT-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
 // DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
 // DEFAULT:       _ZL6rnormfiPKf.exit:
@@ -3503,7 +3503,7 @@ extern "C" __device__ double test_rint(double x) {
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]]
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract float [[TMP0]], [[TMP0]]
 // FINITEONLY-NEXT:    [[ADD_I]] = fadd nnan ninf contract float [[__R_0_I4]], [[MUL_I]]
-// FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 4
+// FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // FINITEONLY-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
 // FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
 // FINITEONLY:       _ZL6rnormfiPKf.exit:
@@ -3523,7 +3523,7 @@ extern "C" __device__ double test_rint(double x) {
 // APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]]
 // APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]]
 // APPROX-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 4
+// APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // APPROX-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
 // APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
 // APPROX:       _ZL6rnormfiPKf.exit:
@@ -3547,7 +3547,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) {
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]]
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]]
 // DEFAULT-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 8
+// DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // DEFAULT-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
 // DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
 // DEFAULT:       _ZL5rnormiPKd.exit:
@@ -3567,7 +3567,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) {
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]]
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract double [[TMP0]], [[TMP0]]
 // FINITEONLY-NEXT:    [[ADD_I]] = fadd nnan ninf contract double [[__R_0_I4]], [[MUL_I]]
-// FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 8
+// FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // FINITEONLY-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
 // FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
 // FINITEONLY:       _ZL5rnormiPKd.exit:
@@ -3587,7 +3587,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) {
 // APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]]
 // APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]]
 // APPROX-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 8
+// APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // APPROX-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
 // APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
 // APPROX:       _ZL5rnormiPKd.exit:
diff --git clang/test/Lexer/cxx-features.cpp clang/test/Lexer/cxx-features.cpp
index 4a06d29ae9db..5b88e00b7150 100644
--- clang/test/Lexer/cxx-features.cpp
+++ clang/test/Lexer/cxx-features.cpp
@@ -325,7 +325,7 @@
 #error "wrong value for __cpp_range_based_for"
 #endif
 
-#if check(static_assert, 0, 200410, 200410, 201411, 201411, 201411, 202306)
+#if check(static_assert, 0, 202306, 202306, 202306, 202306, 202306, 202306)
 #error "wrong value for __cpp_static_assert"
 #endif
 
diff --git clang/test/Lexer/has_feature_realtime_sanitizer.cpp clang/test/Lexer/has_feature_realtime_sanitizer.cpp
new file mode 100644
index 000000000000..76febeb6473a
--- /dev/null
+++ clang/test/Lexer/has_feature_realtime_sanitizer.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -E -fsanitize=realtime %s -o - | FileCheck --check-prefix=CHECK-RTSAN %s
+// RUN: %clang_cc1 -E  %s -o - | FileCheck --check-prefix=CHECK-NO-RTSAN %s
+
+#if __has_feature(realtime_sanitizer)
+int RealtimeSanitizerEnabled();
+#else
+int RealtimeSanitizerDisabled();
+#endif
+
+// CHECK-RTSAN: RealtimeSanitizerEnabled
+
+// CHECK-NO-RTSAN: RealtimeSanitizerDisabled
diff --git clang/test/Misc/pragma-attribute-supported-attributes-list.test clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 5ebbd29b316b..baa1816358b1 100644
--- clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -59,6 +59,7 @@
 // CHECK-NEXT: ConsumableAutoCast (SubjectMatchRule_record)
 // CHECK-NEXT: ConsumableSetOnRead (SubjectMatchRule_record)
 // CHECK-NEXT: Convergent (SubjectMatchRule_function)
+// CHECK-NEXT: CoroAwaitElidable (SubjectMatchRule_record)
 // CHECK-NEXT: CoroDisableLifetimeBound (SubjectMatchRule_function)
 // CHECK-NEXT: CoroLifetimeBound (SubjectMatchRule_record)
 // CHECK-NEXT: CoroOnlyDestroyWhenComplete (SubjectMatchRule_record)
@@ -82,8 +83,6 @@
 // CHECK-NEXT: FunctionReturnThunks (SubjectMatchRule_function)
 // CHECK-NEXT: GNUInline (SubjectMatchRule_function)
 // CHECK-NEXT: HIPManaged (SubjectMatchRule_variable)
-// CHECK-NEXT: HLSLROV (SubjectMatchRule_record_not_is_union)
-// CHECK-NEXT: HLSLResourceClass (SubjectMatchRule_field)
 // CHECK-NEXT: Hot (SubjectMatchRule_function)
 // CHECK-NEXT: HybridPatchable (SubjectMatchRule_function)
 // CHECK-NEXT: IBAction (SubjectMatchRule_objc_method_is_instance)
diff --git clang/test/Misc/print-stats-vfs.test clang/test/Misc/print-stats-vfs.test
new file mode 100644
index 000000000000..65446cb7a507
--- /dev/null
+++ clang/test/Misc/print-stats-vfs.test
@@ -0,0 +1,17 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -fsyntax-only %t/tu.c -I %t/dir1 -I %t/dir2 -print-stats 2>&1 | FileCheck %s
+
+//--- tu.c
+#include "header.h"
+//--- dir1/other.h
+//--- dir2/header.h
+
+// CHECK:      *** Virtual File System Stats:
+// CHECK-NEXT: {{[[:digit:]]+}} status() calls
+// CHECK-NEXT: {{[[:digit:]]+}} openFileForRead() calls
+// CHECK-NEXT: {{[[:digit:]]+}} dir_begin() calls
+// CHECK-NEXT: {{[[:digit:]]+}} getRealPath() calls
+// CHECK-NEXT: {{[[:digit:]]+}} exists() calls
+// CHECK-NEXT: {{[[:digit:]]+}} isLocal() calls
diff --git clang/test/Modules/pr107673.cppm clang/test/Modules/pr107673.cppm
new file mode 100644
index 000000000000..dc66c9ac2245
--- /dev/null
+++ clang/test/Modules/pr107673.cppm
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++20 %s -ast-dump | FileCheck %s
+export module a;
+export class f {
+public:
+    void non_inline_func() {}
+    constexpr void constexpr_func() {}
+    consteval void consteval_func() {}
+};
+
+// CHECK-NOT: non_inline_func {{.*}}implicit-inline
+// CHECK: constexpr_func {{.*}}implicit-inline
+// CHECK: consteval_func {{.*}}implicit-inline
diff --git clang/test/OpenMP/bug60602.cpp clang/test/OpenMP/bug60602.cpp
index cb2e4e5b11e3..0789ef958e52 100644
--- clang/test/OpenMP/bug60602.cpp
+++ clang/test/OpenMP/bug60602.cpp
@@ -58,13 +58,13 @@ int kernel_within_loop(int *a, int *b, int N, int num_iters) {
 // CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i64 0
 // CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP8]] to i64
 // CHECK-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[CONV]], 4
 // CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 0
+// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP11]], i64 0
 // CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK-NEXT:    [[CONV2:%.*]] = sext i32 [[TMP12]] to i64
 // CHECK-NEXT:    [[TMP13:%.*]] = mul nuw i64 [[CONV2]], 4
@@ -134,13 +134,13 @@ int kernel_within_loop(int *a, int *b, int N, int num_iters) {
 // CHECK-NEXT:    [[TMP46:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK-NEXT:    [[TMP47:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP48:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP48]], i64 0
+// CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP48]], i64 0
 // CHECK-NEXT:    [[TMP49:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK-NEXT:    [[CONV5:%.*]] = sext i32 [[TMP49]] to i64
 // CHECK-NEXT:    [[TMP50:%.*]] = mul nuw i64 [[CONV5]], 4
 // CHECK-NEXT:    [[TMP51:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK-NEXT:    [[TMP52:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP52]], i64 0
+// CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP52]], i64 0
 // CHECK-NEXT:    [[TMP53:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK-NEXT:    [[CONV7:%.*]] = sext i32 [[TMP53]] to i64
 // CHECK-NEXT:    [[TMP54:%.*]] = mul nuw i64 [[CONV7]], 4
diff --git clang/test/OpenMP/declare_mapper_codegen.cpp clang/test/OpenMP/declare_mapper_codegen.cpp
index 52d5ceffa147..d2954b7a7482 100644
--- clang/test/OpenMP/declare_mapper_codegen.cpp
+++ clang/test/OpenMP/declare_mapper_codegen.cpp
@@ -129,7 +129,7 @@ public:
 // CK0-DAG: [[BBEGIN:%.+]] = getelementptr inbounds nuw %class.C, ptr [[PTR]], i32 0, i32 1
 // CK0-DAG: [[BBEGIN2:%.+]] = getelementptr inbounds nuw %class.C, ptr [[PTR]], i32 0, i32 1
 // CK0-DAG: [[BARRBEGIN:%.+]] = load ptr, ptr [[BBEGIN2]]
-// CK0-DAG: [[BARRBEGINGEP:%.+]] = getelementptr inbounds double, ptr [[BARRBEGIN]], i[[sz:64|32]] 0
+// CK0-DAG: [[BARRBEGINGEP:%.+]] = getelementptr inbounds nuw double, ptr [[BARRBEGIN]], i[[sz:64|32]] 0
 // CK0-DAG: [[BEND:%.+]] = getelementptr ptr, ptr [[BBEGIN]], i32 1
 // CK0-DAG: [[ABEGINI:%.+]] = ptrtoint ptr [[ABEGIN]] to i64
 // CK0-DAG: [[BENDI:%.+]] = ptrtoint ptr [[BEND]] to i64
@@ -965,7 +965,7 @@ public:
 // CK4-DAG: [[BBEGIN:%.+]] = getelementptr inbounds nuw %class.C, ptr [[PTR]], i32 0, i32 1
 // CK4-DAG: [[BBEGIN2:%.+]] = getelementptr inbounds nuw %class.C, ptr [[PTR]], i32 0, i32 1
 // CK4-DAG: [[BARRBEGIN:%.+]] = load ptr, ptr [[BBEGIN2]]
-// CK4-DAG: [[BARRBEGINGEP:%.+]] = getelementptr inbounds double, ptr [[BARRBEGIN]], i[[sz:64|32]] 0
+// CK4-DAG: [[BARRBEGINGEP:%.+]] = getelementptr inbounds nuw double, ptr [[BARRBEGIN]], i[[sz:64|32]] 0
 // CK4-DAG: [[BEND:%.+]] = getelementptr ptr, ptr [[BBEGIN]], i32 1
 // CK4-DAG: [[ABEGINI:%.+]] = ptrtoint ptr [[ABEGIN]] to i64
 // CK4-DAG: [[BENDI:%.+]] = ptrtoint ptr [[BEND]] to i64
diff --git clang/test/OpenMP/distribute_codegen.cpp clang/test/OpenMP/distribute_codegen.cpp
index ea619cb6e0f2..6c588ba25db3 100644
--- clang/test/OpenMP/distribute_codegen.cpp
+++ clang/test/OpenMP/distribute_codegen.cpp
@@ -662,24 +662,24 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP10]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK1-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]]
 // CHECK1-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP10]]
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK1-NEXT:    [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]]
 // CHECK1-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK1-NEXT:    [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP10]]
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK1-NEXT:    [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]]
+// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]]
 // CHECK1-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK1-NEXT:    [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]]
 // CHECK1-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP10]]
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK1-NEXT:    [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]]
+// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]]
 // CHECK1-NEXT:    store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1:       omp.body.continue:
@@ -1574,21 +1574,21 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK3-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK3-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]]
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 [[TMP15]]
 // CHECK3-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK3-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK3-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK3-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]]
+// CHECK3-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i32 [[TMP18]]
 // CHECK3-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK3-NEXT:    [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK3-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]]
+// CHECK3-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i32 [[TMP21]]
 // CHECK3-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK3-NEXT:    [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]]
 // CHECK3-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK3-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK3-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]]
+// CHECK3-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i32 [[TMP24]]
 // CHECK3-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK3-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK3:       omp.body.continue:
@@ -2252,24 +2252,24 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK17-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP11]]
 // CHECK17-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK17-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64
-// CHECK17-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]]
+// CHECK17-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]]
 // CHECK17-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK17-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP11]]
 // CHECK17-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK17-NEXT:    [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK17-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]]
+// CHECK17-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]]
 // CHECK17-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK17-NEXT:    [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK17-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP11]]
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK17-NEXT:    [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64
-// CHECK17-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]]
+// CHECK17-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]]
 // CHECK17-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK17-NEXT:    [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]]
 // CHECK17-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP11]]
 // CHECK17-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK17-NEXT:    [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64
-// CHECK17-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]]
+// CHECK17-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]]
 // CHECK17-NEXT:    store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK17-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK17:       omp.body.continue:
@@ -2790,21 +2790,21 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK19-NEXT:    store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]]
 // CHECK19-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP12]]
 // CHECK19-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]]
-// CHECK19-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]]
+// CHECK19-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 [[TMP15]]
 // CHECK19-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]]
 // CHECK19-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP12]]
 // CHECK19-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]]
-// CHECK19-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]]
+// CHECK19-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i32 [[TMP18]]
 // CHECK19-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP12]]
 // CHECK19-NEXT:    [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK19-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP12]]
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]]
-// CHECK19-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]]
+// CHECK19-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i32 [[TMP21]]
 // CHECK19-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP12]]
 // CHECK19-NEXT:    [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]]
 // CHECK19-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP12]]
 // CHECK19-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]]
-// CHECK19-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]]
+// CHECK19-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i32 [[TMP24]]
 // CHECK19-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP12]]
 // CHECK19-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK19:       omp.body.continue:
diff --git clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
index b019b4ff92ad..93a6779ac02e 100644
--- clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
+++ clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
@@ -175,16 +175,16 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[ARGC1]], align 4
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 0
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]]
 // CHECK1-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP7]], i64 9
 // CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
 // CHECK1-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64
 // CHECK1-NEXT:    [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]]
@@ -214,7 +214,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP22]]
 // CHECK1-NEXT:    store ptr [[_TMP6]], ptr [[_TMP5]], align 8
 // CHECK1-NEXT:    store ptr [[TMP23]], ptr [[_TMP6]], align 8
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC1]], ptr [[TMP24]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -229,19 +229,19 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP29]], align 8
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP32]], i64 0
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i64 0
 // CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP35]]
 // CHECK1-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP36]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP36]], i64 9
 // CHECK1-NEXT:    [[TMP37:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]]
+// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]]
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP31]], align 8
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX9]], ptr [[TMP38]], align 8
@@ -562,9 +562,9 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9
 // CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
 // CHECK1-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
 // CHECK1-NEXT:    [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
diff --git clang/test/OpenMP/distribute_simd_codegen.cpp clang/test/OpenMP/distribute_simd_codegen.cpp
index f7353172e235..ad93fd6030ac 100644
--- clang/test/OpenMP/distribute_simd_codegen.cpp
+++ clang/test/OpenMP/distribute_simd_codegen.cpp
@@ -706,24 +706,24 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP17]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]]
 // CHECK1-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]]
 // CHECK1-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]]
 // CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP17]]
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]]
 // CHECK1-NEXT:    [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]]
 // CHECK1-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP17]]
 // CHECK1-NEXT:    [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP17]]
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]]
 // CHECK1-NEXT:    [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]]
+// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]]
 // CHECK1-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP17]]
 // CHECK1-NEXT:    [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]]
 // CHECK1-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP17]]
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]]
 // CHECK1-NEXT:    [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]]
+// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]]
 // CHECK1-NEXT:    store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP17]]
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1:       omp.body.continue:
@@ -1682,21 +1682,21 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK3-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK3-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]]
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 [[TMP15]]
 // CHECK3-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK3-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK3-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK3-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]]
+// CHECK3-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i32 [[TMP18]]
 // CHECK3-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK3-NEXT:    [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK3-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]]
+// CHECK3-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i32 [[TMP21]]
 // CHECK3-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK3-NEXT:    [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]]
 // CHECK3-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK3-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK3-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]]
+// CHECK3-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i32 [[TMP24]]
 // CHECK3-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK3-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK3:       omp.body.continue:
@@ -2664,24 +2664,24 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK5-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP18]]
 // CHECK5-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK5-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64
-// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]]
+// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]]
 // CHECK5-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK5-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP18]]
 // CHECK5-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK5-NEXT:    [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK5-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]]
+// CHECK5-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]]
 // CHECK5-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK5-NEXT:    [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK5-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP18]]
 // CHECK5-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK5-NEXT:    [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64
-// CHECK5-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]]
+// CHECK5-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]]
 // CHECK5-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK5-NEXT:    [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]]
 // CHECK5-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP18]]
 // CHECK5-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK5-NEXT:    [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64
-// CHECK5-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]]
+// CHECK5-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]]
 // CHECK5-NEXT:    store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK5-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK5:       omp.body.continue:
@@ -3671,21 +3671,21 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK7-NEXT:    store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK7-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK7-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
-// CHECK7-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]]
+// CHECK7-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 [[TMP15]]
 // CHECK7-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK7-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK7-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
-// CHECK7-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]]
+// CHECK7-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i32 [[TMP18]]
 // CHECK7-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK7-NEXT:    [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK7-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK7-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
-// CHECK7-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]]
+// CHECK7-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i32 [[TMP21]]
 // CHECK7-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK7-NEXT:    [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]]
 // CHECK7-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK7-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
-// CHECK7-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]]
+// CHECK7-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i32 [[TMP24]]
 // CHECK7-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK7:       omp.body.continue:
@@ -4290,24 +4290,24 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK9-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]]
 // CHECK9-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK9-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP5]] to i64
-// CHECK9-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[IDXPROM]]
+// CHECK9-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[IDXPROM]]
 // CHECK9-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK9-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]]
 // CHECK9-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK9-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK9-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[IDXPROM1]]
+// CHECK9-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[IDXPROM1]]
 // CHECK9-NEXT:    [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK9-NEXT:    [[MUL3:%.*]] = fmul float [[TMP6]], [[TMP9]]
 // CHECK9-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]]
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK9-NEXT:    [[IDXPROM4:%.*]] = zext i32 [[TMP11]] to i64
-// CHECK9-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM4]]
+// CHECK9-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM4]]
 // CHECK9-NEXT:    [[TMP12:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK9-NEXT:    [[MUL6:%.*]] = fmul float [[MUL3]], [[TMP12]]
 // CHECK9-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]]
 // CHECK9-NEXT:    [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK9-NEXT:    [[IDXPROM7:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK9-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM7]]
+// CHECK9-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM7]]
 // CHECK9-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK9-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK9:       omp.body.continue:
@@ -4606,21 +4606,21 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK11-NEXT:    store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK11-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK11-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
-// CHECK11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 [[TMP5]]
+// CHECK11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i32 [[TMP5]]
 // CHECK11-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK11-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK11-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
-// CHECK11-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 [[TMP8]]
+// CHECK11-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i32 [[TMP8]]
 // CHECK11-NEXT:    [[TMP9:%.*]] = load float, ptr [[ARRAYIDX1]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK11-NEXT:    [[MUL2:%.*]] = fmul float [[TMP6]], [[TMP9]]
 // CHECK11-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
-// CHECK11-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 [[TMP11]]
+// CHECK11-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 [[TMP11]]
 // CHECK11-NEXT:    [[TMP12:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK11-NEXT:    [[MUL4:%.*]] = fmul float [[MUL2]], [[TMP12]]
 // CHECK11-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK11-NEXT:    [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
-// CHECK11-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]]
+// CHECK11-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i32 [[TMP14]]
 // CHECK11-NEXT:    store float [[MUL4]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK11-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK11:       omp.body.continue:
@@ -4928,24 +4928,24 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK13-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP10]]
 // CHECK13-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK13-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP5]] to i64
-// CHECK13-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[IDXPROM]]
+// CHECK13-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[IDXPROM]]
 // CHECK13-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK13-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP10]]
 // CHECK13-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK13-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK13-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[IDXPROM1]]
+// CHECK13-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[IDXPROM1]]
 // CHECK13-NEXT:    [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK13-NEXT:    [[MUL3:%.*]] = fmul float [[TMP6]], [[TMP9]]
 // CHECK13-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !llvm.access.group [[ACC_GRP10]]
 // CHECK13-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK13-NEXT:    [[IDXPROM4:%.*]] = zext i32 [[TMP11]] to i64
-// CHECK13-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM4]]
+// CHECK13-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM4]]
 // CHECK13-NEXT:    [[TMP12:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK13-NEXT:    [[MUL6:%.*]] = fmul float [[MUL3]], [[TMP12]]
 // CHECK13-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP10]]
 // CHECK13-NEXT:    [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK13-NEXT:    [[IDXPROM7:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK13-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM7]]
+// CHECK13-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM7]]
 // CHECK13-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP10]]
 // CHECK13-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK13:       omp.body.continue:
@@ -5275,21 +5275,21 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK15-NEXT:    store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK15-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK15-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK15-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 [[TMP5]]
+// CHECK15-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i32 [[TMP5]]
 // CHECK15-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK15-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK15-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK15-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 [[TMP8]]
+// CHECK15-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i32 [[TMP8]]
 // CHECK15-NEXT:    [[TMP9:%.*]] = load float, ptr [[ARRAYIDX1]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK15-NEXT:    [[MUL2:%.*]] = fmul float [[TMP6]], [[TMP9]]
 // CHECK15-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK15-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK15-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 [[TMP11]]
+// CHECK15-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 [[TMP11]]
 // CHECK15-NEXT:    [[TMP12:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK15-NEXT:    [[MUL4:%.*]] = fmul float [[MUL2]], [[TMP12]]
 // CHECK15-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK15-NEXT:    [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK15-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]]
+// CHECK15-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i32 [[TMP14]]
 // CHECK15-NEXT:    store float [[MUL4]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP11]]
 // CHECK15-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK15:       omp.body.continue:
@@ -5782,24 +5782,24 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK17-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP18]]
 // CHECK17-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK17-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64
-// CHECK17-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]]
+// CHECK17-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]]
 // CHECK17-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK17-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP18]]
 // CHECK17-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK17-NEXT:    [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK17-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]]
+// CHECK17-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]]
 // CHECK17-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK17-NEXT:    [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK17-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP18]]
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK17-NEXT:    [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64
-// CHECK17-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]]
+// CHECK17-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]]
 // CHECK17-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK17-NEXT:    [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]]
 // CHECK17-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP18]]
 // CHECK17-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK17-NEXT:    [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64
-// CHECK17-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]]
+// CHECK17-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]]
 // CHECK17-NEXT:    store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP18]]
 // CHECK17-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK17:       omp.body.continue:
@@ -6373,21 +6373,21 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK19-NEXT:    store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK19-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK19-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
-// CHECK19-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]]
+// CHECK19-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 [[TMP15]]
 // CHECK19-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK19-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK19-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
-// CHECK19-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]]
+// CHECK19-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i32 [[TMP18]]
 // CHECK19-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK19-NEXT:    [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK19-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
-// CHECK19-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]]
+// CHECK19-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i32 [[TMP21]]
 // CHECK19-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK19-NEXT:    [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]]
 // CHECK19-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK19-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
-// CHECK19-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]]
+// CHECK19-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i32 [[TMP24]]
 // CHECK19-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK19-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK19:       omp.body.continue:
@@ -6970,24 +6970,24 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK21-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP19]]
 // CHECK21-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK21-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64
-// CHECK21-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]]
+// CHECK21-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]]
 // CHECK21-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK21-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP19]]
 // CHECK21-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK21-NEXT:    [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK21-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]]
+// CHECK21-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]]
 // CHECK21-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK21-NEXT:    [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK21-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP19]]
 // CHECK21-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK21-NEXT:    [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64
-// CHECK21-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]]
+// CHECK21-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]]
 // CHECK21-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK21-NEXT:    [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]]
 // CHECK21-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP19]]
 // CHECK21-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK21-NEXT:    [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64
-// CHECK21-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]]
+// CHECK21-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]]
 // CHECK21-NEXT:    store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP19]]
 // CHECK21-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK21:       omp.body.continue:
@@ -7592,21 +7592,21 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK23-NEXT:    store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]]
 // CHECK23-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP20]]
 // CHECK23-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]]
-// CHECK23-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]]
+// CHECK23-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 [[TMP15]]
 // CHECK23-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]]
 // CHECK23-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP20]]
 // CHECK23-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]]
-// CHECK23-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]]
+// CHECK23-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i32 [[TMP18]]
 // CHECK23-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP20]]
 // CHECK23-NEXT:    [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK23-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP20]]
 // CHECK23-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]]
-// CHECK23-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]]
+// CHECK23-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i32 [[TMP21]]
 // CHECK23-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP20]]
 // CHECK23-NEXT:    [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]]
 // CHECK23-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP20]]
 // CHECK23-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]]
-// CHECK23-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]]
+// CHECK23-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i32 [[TMP24]]
 // CHECK23-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP20]]
 // CHECK23-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK23:       omp.body.continue:
diff --git clang/test/OpenMP/for_linear_codegen.cpp clang/test/OpenMP/for_linear_codegen.cpp
index 395ccdbeed76..5a21fe8509fd 100644
--- clang/test/OpenMP/for_linear_codegen.cpp
+++ clang/test/OpenMP/for_linear_codegen.cpp
@@ -650,7 +650,7 @@ int main() {
 // CHECK1-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP16]], [[MUL9]]
 // CHECK1-NEXT:    store i32 [[ADD10]], ptr [[LVAR5]], align 4
 // CHECK1-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[PVAR4]], align 8
-// CHECK1-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 1
+// CHECK1-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP18]], i32 1
 // CHECK1-NEXT:    store ptr [[INCDEC_PTR]], ptr [[PVAR4]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[_TMP6]], align 8
 // CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
diff --git clang/test/OpenMP/for_reduction_codegen.cpp clang/test/OpenMP/for_reduction_codegen.cpp
index ea32e98bf142..83632db23848 100644
--- clang/test/OpenMP/for_reduction_codegen.cpp
+++ clang/test/OpenMP/for_reduction_codegen.cpp
@@ -1021,14 +1021,14 @@ int main() {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP5:%.*]] = mul nsw i64 1, [[TMP1]]
 // CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP5]]
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX]], i64 0
 // CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
 // CHECK1-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]]
 // CHECK1-NEXT:    [[TMP8:%.*]] = mul nsw i64 1, [[TMP1]]
 // CHECK1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP8]]
-// CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX5]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX5]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX6]] to i64
 // CHECK1-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64
 // CHECK1-NEXT:    [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]]
@@ -1054,16 +1054,16 @@ int main() {
 // CHECK1-NEXT:    [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]]
 // CHECK1-NEXT:    [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr i32, ptr [[VLA7]], i64 [[TMP20]]
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[TMP4]], i64 0, i64 1
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw [10 x [4 x %struct.S]], ptr [[TMP4]], i64 0, i64 1
 // CHECK1-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[ARRAYIDX8]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAYDECAY]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[ARRAYDECAY]], i64 1
 // CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4
 // CHECK1-NEXT:    [[TMP23:%.*]] = sext i32 [[TMP22]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN11:%.*]] = add nsw i64 0, [[TMP23]]
-// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[TMP4]], i64 0, i64 [[LB_ADD_LEN11]]
+// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw [10 x [4 x %struct.S]], ptr [[TMP4]], i64 0, i64 [[LB_ADD_LEN11]]
 // CHECK1-NEXT:    [[ARRAYDECAY13:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[ARRAYIDX12]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDECAY13]], i64 2
+// CHECK1-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[ARRAYDECAY13]], i64 2
 // CHECK1-NEXT:    [[TMP24:%.*]] = ptrtoint ptr [[ARRAYIDX14]] to i64
 // CHECK1-NEXT:    [[TMP25:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64
 // CHECK1-NEXT:    [[TMP26:%.*]] = sub i64 [[TMP24]], [[TMP25]]
@@ -1580,10 +1580,10 @@ int main() {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP3:%.*]] = mul nsw i64 1, [[TMP1]]
 // CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP3]]
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX]], i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = mul nsw i64 1, [[TMP1]]
 // CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP4]]
-// CHECK1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX4]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX4]], i64 1
 // CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [2 x i32]], ptr [[ARR6]], i32 0, i32 0, i32 0
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 2
 // CHECK1-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]]
@@ -1757,13 +1757,13 @@ int main() {
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 4
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 4
 // CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 6
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP4]], i64 6
 // CHECK1-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64
 // CHECK1-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64
 // CHECK1-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]]
@@ -1963,11 +1963,11 @@ int main() {
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 1
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1
 // CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 6
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP4]], i64 6
 // CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR24]], i32 0, i32 0, i32 0
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 6
 // CHECK1-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]]
@@ -2148,13 +2148,13 @@ int main() {
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 1
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 1
 // CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 6
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP4]], i64 6
 // CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR24]], i32 0, i32 0, i32 0
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 6
 // CHECK1-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]]
@@ -2335,13 +2335,13 @@ int main() {
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 1
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP2]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP2]], i64 1
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 1
 // CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP4]], i64 1
 // CHECK1-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR24]])
 // CHECK1-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
@@ -2459,8 +2459,8 @@ int main() {
 // CHECK1-NEXT:    store i32 9, ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[TMP0]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[TMP0]], i64 0, i64 4
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [5 x %struct.S], ptr [[TMP0]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [5 x %struct.S], ptr [[TMP0]], i64 0, i64 4
 // CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR22]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 5
 // CHECK1-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]]
@@ -2641,9 +2641,9 @@ int main() {
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP2]], i64 0, i64 1
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4 x %struct.S], ptr [[TMP2]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP3]], i64 0, i64 2
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [4 x %struct.S], ptr [[TMP3]], i64 0, i64 2
 // CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR34]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2
 // CHECK1-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
@@ -2826,9 +2826,9 @@ int main() {
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP2]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4 x %struct.S], ptr [[TMP2]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [4 x %struct.S], ptr [[TMP3]], i64 0, i64 1
 // CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR34]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2
 // CHECK1-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
@@ -3012,9 +3012,9 @@ int main() {
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP2]], i64 0, i64 2
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4 x %struct.S], ptr [[TMP2]], i64 0, i64 2
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP3]], i64 0, i64 3
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [4 x %struct.S], ptr [[TMP3]], i64 0, i64 3
 // CHECK1-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64
 // CHECK1-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK1-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
@@ -3974,8 +3974,8 @@ int main() {
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S.0], ptr [[TMP0]], i64 0, i64 1
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S.0], ptr [[TMP0]], i64 0, i64 40
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [42 x %struct.S.0], ptr [[TMP0]], i64 0, i64 1
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [42 x %struct.S.0], ptr [[TMP0]], i64 0, i64 40
 // CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S.0], ptr [[ARR4]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 40
 // CHECK1-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]]
diff --git clang/test/OpenMP/for_reduction_codegen_UDR.cpp clang/test/OpenMP/for_reduction_codegen_UDR.cpp
index 16d6c23542fc..82f94c949eea 100644
--- clang/test/OpenMP/for_reduction_codegen_UDR.cpp
+++ clang/test/OpenMP/for_reduction_codegen_UDR.cpp
@@ -1074,14 +1074,14 @@ int main() {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP5:%.*]] = mul nsw i64 1, [[TMP1]]
 // CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP5]]
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX]], i64 0
 // CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
 // CHECK1-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]]
 // CHECK1-NEXT:    [[TMP8:%.*]] = mul nsw i64 1, [[TMP1]]
 // CHECK1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP8]]
-// CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX5]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX5]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX6]] to i64
 // CHECK1-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64
 // CHECK1-NEXT:    [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]]
@@ -1109,16 +1109,16 @@ int main() {
 // CHECK1-NEXT:    [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]]
 // CHECK1-NEXT:    [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr i32, ptr [[VLA7]], i64 [[TMP20]]
-// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[TMP4]], i64 0, i64 1
+// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw [10 x [4 x %struct.S.0]], ptr [[TMP4]], i64 0, i64 1
 // CHECK1-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[ARRAYIDX9]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAYDECAY]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw [[STRUCT_S_0:%.*]], ptr [[ARRAYDECAY]], i64 1
 // CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
 // CHECK1-NEXT:    [[TMP23:%.*]] = sext i32 [[TMP22]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN12:%.*]] = add nsw i64 0, [[TMP23]]
-// CHECK1-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[TMP4]], i64 0, i64 [[LB_ADD_LEN12]]
+// CHECK1-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw [10 x [4 x %struct.S.0]], ptr [[TMP4]], i64 0, i64 [[LB_ADD_LEN12]]
 // CHECK1-NEXT:    [[ARRAYDECAY14:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[ARRAYIDX13]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDECAY14]], i64 2
+// CHECK1-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw [[STRUCT_S_0]], ptr [[ARRAYDECAY14]], i64 2
 // CHECK1-NEXT:    [[TMP24:%.*]] = ptrtoint ptr [[ARRAYIDX15]] to i64
 // CHECK1-NEXT:    [[TMP25:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64
 // CHECK1-NEXT:    [[TMP26:%.*]] = sub i64 [[TMP24]], [[TMP25]]
@@ -1669,13 +1669,13 @@ int main() {
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[TMP2]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [[STRUCT_S_0:%.*]], ptr [[TMP2]], i64 1
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 4
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 4
 // CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP4]], i64 6
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [[STRUCT_S_0]], ptr [[TMP4]], i64 6
 // CHECK1-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64
 // CHECK1-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64
 // CHECK1-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]]
@@ -1877,8 +1877,8 @@ int main() {
 // CHECK1-NEXT:    store i32 9, ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[TMP0]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[TMP0]], i64 0, i64 4
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [5 x %struct.S.0], ptr [[TMP0]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [5 x %struct.S.0], ptr [[TMP0]], i64 0, i64 4
 // CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR22]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 5
 // CHECK1-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]]
@@ -2066,9 +2066,9 @@ int main() {
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP2]], i64 0, i64 1
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4 x %struct.S.0], ptr [[TMP2]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP3]], i64 0, i64 2
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [4 x %struct.S.0], ptr [[TMP3]], i64 0, i64 2
 // CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[VAR34]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2
 // CHECK1-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
@@ -2979,8 +2979,8 @@ int main() {
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[TMP0]], i64 0, i64 1
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[TMP0]], i64 0, i64 40
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [42 x %struct.S], ptr [[TMP0]], i64 0, i64 1
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [42 x %struct.S], ptr [[TMP0]], i64 0, i64 40
 // CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S], ptr [[ARR4]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 40
 // CHECK1-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]]
diff --git clang/test/OpenMP/for_reduction_task_codegen.cpp clang/test/OpenMP/for_reduction_task_codegen.cpp
index ea93323de77d..b875279c2a14 100644
--- clang/test/OpenMP/for_reduction_task_codegen.cpp
+++ clang/test/OpenMP/for_reduction_task_codegen.cpp
@@ -68,16 +68,16 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[ARGC1]], align 4
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP2]], i64 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP5]]
 // CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP6]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP6]], i64 9
 // CHECK1-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
 // CHECK1-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64
 // CHECK1-NEXT:    [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]]
@@ -107,7 +107,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP21]]
 // CHECK1-NEXT:    store ptr [[_TMP6]], ptr [[_TMP5]], align 8
 // CHECK1-NEXT:    store ptr [[TMP22]], ptr [[_TMP6]], align 8
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC1]], ptr [[TMP23]], align 8
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -122,19 +122,19 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP28]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP29]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP31]], i64 0
 // CHECK1-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP32]], i64 0
 // CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP34:%.*]] = sext i32 [[TMP33]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP34]]
 // CHECK1-NEXT:    [[TMP35:%.*]] = load ptr, ptr [[TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP35]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP35]], i64 9
 // CHECK1-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 [[LB_ADD_LEN10]]
+// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP36]], i64 [[LB_ADD_LEN10]]
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP30]], align 8
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX9]], ptr [[TMP37]], align 8
@@ -459,9 +459,9 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9
 // CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
 // CHECK1-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
 // CHECK1-NEXT:    [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
diff --git clang/test/OpenMP/for_scan_codegen.cpp clang/test/OpenMP/for_scan_codegen.cpp
index 4cf18a76fbfe..61e6534db471 100644
--- clang/test/OpenMP/for_scan_codegen.cpp
+++ clang/test/OpenMP/for_scan_codegen.cpp
@@ -39,13 +39,13 @@ void baz(int n) {
     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false)
 
     // b_buffer[i] = b_priv;
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]]
     // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]],
     // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]],
     // CHECK: br label %[[LOOP_CONTINUE:.+]]
@@ -72,13 +72,13 @@ void baz(int n) {
 
     // a_buffer[i] += a_buffer[i-pow(2, k)];
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]]
+    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
-    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
+    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
     // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
     // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]]
     // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
@@ -132,13 +132,13 @@ void baz(int n) {
     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false)
 
     // b_priv = b_buffer[i];
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]]
     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
     // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]],
     // CHECK: br label %[[SCAN_PHASE:[^,]+]]
@@ -179,13 +179,13 @@ void baz(int n) {
     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false)
 
     // b_buffer[i] = b_priv;
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]]
     // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]],
     // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]],
     // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
@@ -217,13 +217,13 @@ void baz(int n) {
 
     // a_buffer[i] += a_buffer[i-pow(2, k)];
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]]
+    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
-    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
+    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
     // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
     // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]]
     // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
@@ -280,13 +280,13 @@ void baz(int n) {
     // CHECK: [[IF_THEN]]:
     // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false)
 
     // b_priv = b_buffer[i];
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
     // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]],
     // CHECK: br label %[[SCAN_PHASE]]
diff --git clang/test/OpenMP/for_simd_scan_codegen.cpp clang/test/OpenMP/for_simd_scan_codegen.cpp
index 29af5f74c5b5..829f2656042f 100644
--- clang/test/OpenMP/for_simd_scan_codegen.cpp
+++ clang/test/OpenMP/for_simd_scan_codegen.cpp
@@ -39,13 +39,13 @@ void baz(int n) {
     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false)
 
     // b_buffer[i] = b_priv;
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]]
     // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]],
     // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]],
     // CHECK: br label %[[LOOP_CONTINUE:.+]]
@@ -72,13 +72,13 @@ void baz(int n) {
 
     // a_buffer[i] += a_buffer[i-pow(2, k)];
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]]
+    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
-    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
+    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
     // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
     // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]]
     // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
@@ -132,13 +132,13 @@ void baz(int n) {
     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false)
 
     // b_priv = b_buffer[i];
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]]
     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
     // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]],
     // CHECK: br label %[[SCAN_PHASE:[^,]+]]
@@ -179,13 +179,13 @@ void baz(int n) {
     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false)
 
     // b_buffer[i] = b_priv;
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]]
     // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]],
     // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]],
     // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
@@ -217,13 +217,13 @@ void baz(int n) {
 
     // a_buffer[i] += a_buffer[i-pow(2, k)];
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]]
+    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
-    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
+    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
     // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
     // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]]
     // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
@@ -280,13 +280,13 @@ void baz(int n) {
     // CHECK: [[IF_THEN]]:
     // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false)
 
     // b_priv = b_buffer[i];
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
     // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]],
     // CHECK: br label %[[SCAN_PHASE]]
diff --git clang/test/OpenMP/irbuilder_for_iterator.cpp clang/test/OpenMP/irbuilder_for_iterator.cpp
index 0098a7db575c..ec1c3af744b4 100644
--- clang/test/OpenMP/irbuilder_for_iterator.cpp
+++ clang/test/OpenMP/irbuilder_for_iterator.cpp
@@ -78,18 +78,18 @@ extern "C" void workshareloop_iterator(float *a, float *b, float *c) {
 // CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[IDXPROM]]
 // CHECK-NEXT:    [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM2:%.*]] = zext i32 [[TMP11]] to i64
-// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM2]]
+// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM2]]
 // CHECK-NEXT:    [[TMP12:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP9]], [[TMP12]]
 // CHECK-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM4:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM4]]
+// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM4]]
 // CHECK-NEXT:    store float [[MUL]], ptr [[ARRAYIDX5]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC]]
 // CHECK:       omp_loop.inc:
diff --git clang/test/OpenMP/irbuilder_for_rangefor.cpp clang/test/OpenMP/irbuilder_for_rangefor.cpp
index 45b34621afbb..86a043e638bc 100644
--- clang/test/OpenMP/irbuilder_for_rangefor.cpp
+++ clang/test/OpenMP/irbuilder_for_rangefor.cpp
@@ -94,18 +94,18 @@ extern "C" void workshareloop_rangefor(float *a, float *b, float *c) {
 // CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP11]] to i64
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM]]
 // CHECK-NEXT:    [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM2:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM2]]
+// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM2]]
 // CHECK-NEXT:    [[TMP15:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP12]], [[TMP15]]
 // CHECK-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM4:%.*]] = zext i32 [[TMP17]] to i64
-// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM4]]
+// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM4]]
 // CHECK-NEXT:    store float [[MUL]], ptr [[ARRAYIDX5]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC]]
 // CHECK:       omp_loop.inc:
diff --git clang/test/OpenMP/irbuilder_for_unsigned.c clang/test/OpenMP/irbuilder_for_unsigned.c
index b0043b823ac8..675871a87b3b 100644
--- clang/test/OpenMP/irbuilder_for_unsigned.c
+++ clang/test/OpenMP/irbuilder_for_unsigned.c
@@ -65,24 +65,24 @@ extern "C" void workshareloop_unsigned(float *a, float *b, float *c, float *d) {
 // CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[IDXPROM]]
 // CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM2:%.*]] = zext i32 [[TMP13]] to i64
-// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM2]]
+// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[IDXPROM2]]
 // CHECK-NEXT:    [[TMP14:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP11]], [[TMP14]]
 // CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM4:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM4]]
+// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP15]], i64 [[IDXPROM4]]
 // CHECK-NEXT:    [[TMP17:%.*]] = load float, ptr [[ARRAYIDX5]], align 4
 // CHECK-NEXT:    [[MUL6:%.*]] = fmul float [[MUL]], [[TMP17]]
 // CHECK-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM7:%.*]] = zext i32 [[TMP19]] to i64
-// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM7]]
+// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP18]], i64 [[IDXPROM7]]
 // CHECK-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX8]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC]]
 // CHECK:       omp_loop.inc:
diff --git clang/test/OpenMP/irbuilder_for_unsigned_auto.c clang/test/OpenMP/irbuilder_for_unsigned_auto.c
index 19b2770bfa2d..39ede3ef971d 100644
--- clang/test/OpenMP/irbuilder_for_unsigned_auto.c
+++ clang/test/OpenMP/irbuilder_for_unsigned_auto.c
@@ -66,24 +66,24 @@ extern "C" void workshareloop_unsigned_auto(float *a, float *b, float *c, float
 // CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP4]] to i64
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 [[IDXPROM]]
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM2:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]]
+// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[IDXPROM2]]
 // CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]]
 // CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM4:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]]
+// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[IDXPROM4]]
 // CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4
 // CHECK-NEXT:    [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]]
 // CHECK-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM7:%.*]] = zext i32 [[TMP13]] to i64
-// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]]
+// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[IDXPROM7]]
 // CHECK-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX8]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC]]
 // CHECK:       omp_loop.inc:
diff --git clang/test/OpenMP/irbuilder_for_unsigned_down.c clang/test/OpenMP/irbuilder_for_unsigned_down.c
index 6e179826a6ef..5515f086c34a 100644
--- clang/test/OpenMP/irbuilder_for_unsigned_down.c
+++ clang/test/OpenMP/irbuilder_for_unsigned_down.c
@@ -67,7 +67,7 @@ extern "C" void workshareloop_unsigned_down(float *a) {
 // CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP11]] to i64
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM]]
 // CHECK-NEXT:    store float [[CONV]], ptr [[ARRAYIDX]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC]]
 // CHECK:       omp_loop.inc:
diff --git clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c
index 8f3297061938..f20b60e608d2 100644
--- clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c
+++ clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c
@@ -66,24 +66,24 @@ extern "C" void workshareloop_unsigned_dynamic(float *a, float *b, float *c, flo
 // CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP4]] to i64
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 [[IDXPROM]]
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM2:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]]
+// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[IDXPROM2]]
 // CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]]
 // CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM4:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]]
+// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[IDXPROM4]]
 // CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4
 // CHECK-NEXT:    [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]]
 // CHECK-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM7:%.*]] = zext i32 [[TMP13]] to i64
-// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]]
+// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[IDXPROM7]]
 // CHECK-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX8]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC]]
 // CHECK:       omp_loop.inc:
diff --git clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c
index c2b0948bf7ae..599f256243b1 100644
--- clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c
+++ clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c
@@ -66,24 +66,24 @@ extern "C" void workshareloop_unsigned_dynamic_chunked(float *a, float *b, float
 // CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP4]] to i64
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 [[IDXPROM]]
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM2:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]]
+// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[IDXPROM2]]
 // CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]]
 // CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM4:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]]
+// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[IDXPROM4]]
 // CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4
 // CHECK-NEXT:    [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]]
 // CHECK-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM7:%.*]] = zext i32 [[TMP13]] to i64
-// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]]
+// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[IDXPROM7]]
 // CHECK-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX8]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC]]
 // CHECK:       omp_loop.inc:
diff --git clang/test/OpenMP/irbuilder_for_unsigned_runtime.c clang/test/OpenMP/irbuilder_for_unsigned_runtime.c
index 68becf9f694a..c27bcba15591 100644
--- clang/test/OpenMP/irbuilder_for_unsigned_runtime.c
+++ clang/test/OpenMP/irbuilder_for_unsigned_runtime.c
@@ -66,24 +66,24 @@ extern "C" void workshareloop_unsigned_runtime(float *a, float *b, float *c, flo
 // CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP4]] to i64
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 [[IDXPROM]]
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM2:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]]
+// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[IDXPROM2]]
 // CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]]
 // CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM4:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]]
+// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[IDXPROM4]]
 // CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4
 // CHECK-NEXT:    [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]]
 // CHECK-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM7:%.*]] = zext i32 [[TMP13]] to i64
-// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]]
+// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[IDXPROM7]]
 // CHECK-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX8]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC]]
 // CHECK:       omp_loop.inc:
diff --git clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c
index 71fb6b5473da..b937568ca9f1 100644
--- clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c
+++ clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c
@@ -108,24 +108,24 @@ extern "C" void workshareloop_unsigned_static_chunked(float *a, float *b, float
 // CHECK-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM]]
 // CHECK-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM2:%.*]] = zext i32 [[TMP21]] to i64
-// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]]
+// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM2]]
 // CHECK-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP19]], [[TMP22]]
 // CHECK-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM4:%.*]] = zext i32 [[TMP24]] to i64
-// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM4]]
+// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM4]]
 // CHECK-NEXT:    [[TMP25:%.*]] = load float, ptr [[ARRAYIDX5]], align 4
 // CHECK-NEXT:    [[MUL6:%.*]] = fmul float [[MUL]], [[TMP25]]
 // CHECK-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[IDXPROM7:%.*]] = zext i32 [[TMP27]] to i64
-// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM7]]
+// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP26]], i64 [[IDXPROM7]]
 // CHECK-NEXT:    store float [[MUL6]], ptr [[ARRAYIDX8]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC]]
 // CHECK:       omp_loop.inc:
diff --git clang/test/OpenMP/map_struct_ordering.cpp clang/test/OpenMP/map_struct_ordering.cpp
index d5b22d8ff2a4..a52ddad465f3 100644
--- clang/test/OpenMP/map_struct_ordering.cpp
+++ clang/test/OpenMP/map_struct_ordering.cpp
@@ -57,7 +57,7 @@ int map_struct() {
 // CHECK-NEXT:    [[DATUM:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 0, i32 0
 // CHECK-NEXT:    [[DATUM2:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DATUM2]], align 8
-// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0
+// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i64 0
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 1
 // CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 // CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[DAT]] to i64
diff --git clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
index e90f0783787c..7d467293d0c8 100644
--- clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
+++ clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
@@ -76,7 +76,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP2]], align 16
 // CHECK1-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
 // CHECK1-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[A]], ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -91,7 +91,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP9]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[B]], ptr [[TMP11]], align 8
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 1
@@ -106,7 +106,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..2, ptr [[TMP16]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP17]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC_ADDR]], ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 1
@@ -124,7 +124,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP25:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 3, ptr [[DOTRD_INPUT_]])
 // CHECK1-NEXT:    store ptr [[TMP25]], ptr [[DOTTASK_RED_]], align 8
 // CHECK1-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[C]], ptr [[TMP26]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
@@ -139,7 +139,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..6, ptr [[TMP31]], align 8
 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP33]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1
diff --git clang/test/OpenMP/master_taskloop_reduction_codegen.cpp clang/test/OpenMP/master_taskloop_reduction_codegen.cpp
index 180ff3a94d24..b0652c843845 100644
--- clang/test/OpenMP/master_taskloop_reduction_codegen.cpp
+++ clang/test/OpenMP/master_taskloop_reduction_codegen.cpp
@@ -84,9 +84,9 @@ sum = 0.0;
 // CHECK-DAG:    store ptr @[[RED_COMB1:.+]], ptr [[TMP25]],
 // CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK-DAG:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 4, i1 false)
-// CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 0
+// CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 0
 // CHECK-DAG:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, %
-// CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]]
+// CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]]
 // CHECK-DAG:    store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]],
 // CHECK-DAG:    [[TMP28]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
 // CHECK-DAG:    store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]],
@@ -138,10 +138,10 @@ sum = 0.0;
 // CHECK-DAG:    store ptr @[[RED_COMB4:.+]], ptr [[TMP59]],
 // CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_8]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, ptr [[TMP60]],
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
 // CHECK:    [[TMP62:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 4, ptr [[DOTRD_INPUT_]])
 // CHECK:    [[TMP63:%.*]] = load i32, ptr [[N]],
 // CHECK:    store i32 [[TMP63]], ptr [[DOTCAPTURE_EXPR_]],
diff --git clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
index 2061da7c4d78..b0d00c5f539b 100644
--- clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
+++ clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
@@ -76,7 +76,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP2]], align 16
 // CHECK1-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
 // CHECK1-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[A]], ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -91,7 +91,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP9]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[B]], ptr [[TMP11]], align 8
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 1
@@ -106,7 +106,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..2, ptr [[TMP16]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP17]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC_ADDR]], ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 1
@@ -124,7 +124,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP25:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 3, ptr [[DOTRD_INPUT_]])
 // CHECK1-NEXT:    store ptr [[TMP25]], ptr [[DOTTASK_RED_]], align 8
 // CHECK1-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[C]], ptr [[TMP26]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
@@ -139,7 +139,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..6, ptr [[TMP31]], align 8
 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP33]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1
diff --git clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp
index a69844dc4dee..7def61251b24 100644
--- clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp
+++ clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp
@@ -80,9 +80,9 @@ sum = 0.0;
 // CHECK-DAG:    store ptr @[[RED_COMB1:.+]], ptr [[TMP25]],
 // CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK-DAG:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 4, i1 false)
-// CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 0
+// CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 0
 // CHECK-DAG:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, %
-// CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]]
+// CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]]
 // CHECK-DAG:    store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]],
 // CHECK-DAG:    [[TMP28]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
 // CHECK-DAG:    store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]],
@@ -134,10 +134,10 @@ sum = 0.0;
 // CHECK-DAG:    store ptr @[[RED_COMB4:.+]], ptr [[TMP59]],
 // CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_8]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, ptr [[TMP60]],
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
 // CHECK:    [[TMP62:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 4, ptr [[DOTRD_INPUT_]])
 // CHECK:    [[TMP63:%.*]] = load i32, ptr [[N]],
 // CHECK:    store i32 [[TMP63]], ptr [[DOTCAPTURE_EXPR_]],
diff --git clang/test/OpenMP/ordered_codegen.cpp clang/test/OpenMP/ordered_codegen.cpp
index 0a73eaefc980..67285cfaef34 100644
--- clang/test/OpenMP/ordered_codegen.cpp
+++ clang/test/OpenMP/ordered_codegen.cpp
@@ -255,21 +255,21 @@ void foo_simd(int low, int up) {
 // CHECK1-NEXT:    call void @__kmpc_ordered(ptr @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = load i64, ptr [[I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP7]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP7]]
 // CHECK1-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = load i64, ptr [[I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[TMP10]]
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[TMP10]]
 // CHECK1-NEXT:    [[TMP11:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 // CHECK1-NEXT:    [[MUL3:%.*]] = fmul float [[TMP8]], [[TMP11]]
 // CHECK1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i64, ptr [[I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP13]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP13]]
 // CHECK1-NEXT:    [[TMP14:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
 // CHECK1-NEXT:    [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP14]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i64, ptr [[I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[TMP16]]
+// CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP15]], i64 [[TMP16]]
 // CHECK1-NEXT:    store float [[MUL5]], ptr [[ARRAYIDX6]], align 4
 // CHECK1-NEXT:    call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
@@ -485,24 +485,24 @@ void foo_simd(int low, int up) {
 // CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i8, ptr [[I]], align 1
 // CHECK1-NEXT:    [[IDXPROM:%.*]] = zext i8 [[TMP9]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[IDXPROM]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[IDXPROM]]
 // CHECK1-NEXT:    [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP12:%.*]] = load i8, ptr [[I]], align 1
 // CHECK1-NEXT:    [[IDXPROM7:%.*]] = zext i8 [[TMP12]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[IDXPROM7]]
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[IDXPROM7]]
 // CHECK1-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX8]], align 4
 // CHECK1-NEXT:    [[MUL9:%.*]] = fmul float [[TMP10]], [[TMP13]]
 // CHECK1-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i8, ptr [[I]], align 1
 // CHECK1-NEXT:    [[IDXPROM10:%.*]] = zext i8 [[TMP15]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM10]]
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM10]]
 // CHECK1-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
 // CHECK1-NEXT:    [[MUL12:%.*]] = fmul float [[MUL9]], [[TMP16]]
 // CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i8, ptr [[I]], align 1
 // CHECK1-NEXT:    [[IDXPROM13:%.*]] = zext i8 [[TMP18]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM13]]
+// CHECK1-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM13]]
 // CHECK1-NEXT:    store float [[MUL12]], ptr [[ARRAYIDX14]], align 4
 // CHECK1-NEXT:    call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
@@ -866,21 +866,21 @@ void foo_simd(int low, int up) {
 // CHECK1-IRBUILDER-NEXT:    call void @__kmpc_ordered(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 // CHECK1-IRBUILDER-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK1-IRBUILDER-NEXT:    [[TMP6:%.*]] = load i64, ptr [[I]], align 8
-// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]]
+// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 [[TMP6]]
 // CHECK1-IRBUILDER-NEXT:    [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK1-IRBUILDER-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK1-IRBUILDER-NEXT:    [[TMP9:%.*]] = load i64, ptr [[I]], align 8
-// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[TMP9]]
+// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP9]]
 // CHECK1-IRBUILDER-NEXT:    [[TMP10:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
 // CHECK1-IRBUILDER-NEXT:    [[MUL5:%.*]] = fmul float [[TMP7]], [[TMP10]]
 // CHECK1-IRBUILDER-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK1-IRBUILDER-NEXT:    [[TMP12:%.*]] = load i64, ptr [[I]], align 8
-// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]]
+// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]]
 // CHECK1-IRBUILDER-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
 // CHECK1-IRBUILDER-NEXT:    [[MUL7:%.*]] = fmul float [[MUL5]], [[TMP13]]
 // CHECK1-IRBUILDER-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK1-IRBUILDER-NEXT:    [[TMP15:%.*]] = load i64, ptr [[I]], align 8
-// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]]
+// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]]
 // CHECK1-IRBUILDER-NEXT:    store float [[MUL7]], ptr [[ARRAYIDX8]], align 4
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body.ordered.after:
@@ -1110,24 +1110,24 @@ void foo_simd(int low, int up) {
 // CHECK1-IRBUILDER-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK1-IRBUILDER-NEXT:    [[TMP8:%.*]] = load i8, ptr [[I]], align 1
 // CHECK1-IRBUILDER-NEXT:    [[IDXPROM:%.*]] = zext i8 [[TMP8]] to i64
-// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[IDXPROM]]
+// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[IDXPROM]]
 // CHECK1-IRBUILDER-NEXT:    [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK1-IRBUILDER-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK1-IRBUILDER-NEXT:    [[TMP11:%.*]] = load i8, ptr [[I]], align 1
 // CHECK1-IRBUILDER-NEXT:    [[IDXPROM9:%.*]] = zext i8 [[TMP11]] to i64
-// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM9]]
+// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM9]]
 // CHECK1-IRBUILDER-NEXT:    [[TMP12:%.*]] = load float, ptr [[ARRAYIDX10]], align 4
 // CHECK1-IRBUILDER-NEXT:    [[MUL11:%.*]] = fmul float [[TMP9]], [[TMP12]]
 // CHECK1-IRBUILDER-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK1-IRBUILDER-NEXT:    [[TMP14:%.*]] = load i8, ptr [[I]], align 1
 // CHECK1-IRBUILDER-NEXT:    [[IDXPROM12:%.*]] = zext i8 [[TMP14]] to i64
-// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM12]]
+// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM12]]
 // CHECK1-IRBUILDER-NEXT:    [[TMP15:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
 // CHECK1-IRBUILDER-NEXT:    [[MUL14:%.*]] = fmul float [[MUL11]], [[TMP15]]
 // CHECK1-IRBUILDER-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK1-IRBUILDER-NEXT:    [[TMP17:%.*]] = load i8, ptr [[I]], align 1
 // CHECK1-IRBUILDER-NEXT:    [[IDXPROM15:%.*]] = zext i8 [[TMP17]] to i64
-// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM15]]
+// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM15]]
 // CHECK1-IRBUILDER-NEXT:    store float [[MUL14]], ptr [[ARRAYIDX16]], align 4
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body.ordered.after:
@@ -1495,21 +1495,21 @@ void foo_simd(int low, int up) {
 // CHECK3-NEXT:    call void @__kmpc_ordered(ptr @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = load i64, ptr [[I]], align 8
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP7]]
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP7]]
 // CHECK3-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP10:%.*]] = load i64, ptr [[I]], align 8
-// CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[TMP10]]
+// CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[TMP10]]
 // CHECK3-NEXT:    [[TMP11:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 // CHECK3-NEXT:    [[MUL3:%.*]] = fmul float [[TMP8]], [[TMP11]]
 // CHECK3-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i64, ptr [[I]], align 8
-// CHECK3-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP13]]
+// CHECK3-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP13]]
 // CHECK3-NEXT:    [[TMP14:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
 // CHECK3-NEXT:    [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP14]]
 // CHECK3-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i64, ptr [[I]], align 8
-// CHECK3-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[TMP16]]
+// CHECK3-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP15]], i64 [[TMP16]]
 // CHECK3-NEXT:    store float [[MUL5]], ptr [[ARRAYIDX6]], align 4
 // CHECK3-NEXT:    call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
@@ -1725,24 +1725,24 @@ void foo_simd(int low, int up) {
 // CHECK3-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP9:%.*]] = load i8, ptr [[I]], align 1
 // CHECK3-NEXT:    [[IDXPROM:%.*]] = zext i8 [[TMP9]] to i64
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[IDXPROM]]
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[IDXPROM]]
 // CHECK3-NEXT:    [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK3-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP12:%.*]] = load i8, ptr [[I]], align 1
 // CHECK3-NEXT:    [[IDXPROM7:%.*]] = zext i8 [[TMP12]] to i64
-// CHECK3-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[IDXPROM7]]
+// CHECK3-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[IDXPROM7]]
 // CHECK3-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX8]], align 4
 // CHECK3-NEXT:    [[MUL9:%.*]] = fmul float [[TMP10]], [[TMP13]]
 // CHECK3-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP15:%.*]] = load i8, ptr [[I]], align 1
 // CHECK3-NEXT:    [[IDXPROM10:%.*]] = zext i8 [[TMP15]] to i64
-// CHECK3-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM10]]
+// CHECK3-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM10]]
 // CHECK3-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
 // CHECK3-NEXT:    [[MUL12:%.*]] = fmul float [[MUL9]], [[TMP16]]
 // CHECK3-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = load i8, ptr [[I]], align 1
 // CHECK3-NEXT:    [[IDXPROM13:%.*]] = zext i8 [[TMP18]] to i64
-// CHECK3-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM13]]
+// CHECK3-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM13]]
 // CHECK3-NEXT:    store float [[MUL12]], ptr [[ARRAYIDX14]], align 4
 // CHECK3-NEXT:    call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
@@ -2106,21 +2106,21 @@ void foo_simd(int low, int up) {
 // CHECK3-IRBUILDER-NEXT:    call void @__kmpc_ordered(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 // CHECK3-IRBUILDER-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK3-IRBUILDER-NEXT:    [[TMP6:%.*]] = load i64, ptr [[I]], align 8
-// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]]
+// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 [[TMP6]]
 // CHECK3-IRBUILDER-NEXT:    [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK3-IRBUILDER-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK3-IRBUILDER-NEXT:    [[TMP9:%.*]] = load i64, ptr [[I]], align 8
-// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[TMP9]]
+// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP9]]
 // CHECK3-IRBUILDER-NEXT:    [[TMP10:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
 // CHECK3-IRBUILDER-NEXT:    [[MUL5:%.*]] = fmul float [[TMP7]], [[TMP10]]
 // CHECK3-IRBUILDER-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK3-IRBUILDER-NEXT:    [[TMP12:%.*]] = load i64, ptr [[I]], align 8
-// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]]
+// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]]
 // CHECK3-IRBUILDER-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
 // CHECK3-IRBUILDER-NEXT:    [[MUL7:%.*]] = fmul float [[MUL5]], [[TMP13]]
 // CHECK3-IRBUILDER-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK3-IRBUILDER-NEXT:    [[TMP15:%.*]] = load i64, ptr [[I]], align 8
-// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]]
+// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]]
 // CHECK3-IRBUILDER-NEXT:    store float [[MUL7]], ptr [[ARRAYIDX8]], align 4
 // CHECK3-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK3-IRBUILDER:       omp.inner.for.body.ordered.after:
@@ -2350,24 +2350,24 @@ void foo_simd(int low, int up) {
 // CHECK3-IRBUILDER-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK3-IRBUILDER-NEXT:    [[TMP8:%.*]] = load i8, ptr [[I]], align 1
 // CHECK3-IRBUILDER-NEXT:    [[IDXPROM:%.*]] = zext i8 [[TMP8]] to i64
-// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[IDXPROM]]
+// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[IDXPROM]]
 // CHECK3-IRBUILDER-NEXT:    [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK3-IRBUILDER-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK3-IRBUILDER-NEXT:    [[TMP11:%.*]] = load i8, ptr [[I]], align 1
 // CHECK3-IRBUILDER-NEXT:    [[IDXPROM9:%.*]] = zext i8 [[TMP11]] to i64
-// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM9]]
+// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM9]]
 // CHECK3-IRBUILDER-NEXT:    [[TMP12:%.*]] = load float, ptr [[ARRAYIDX10]], align 4
 // CHECK3-IRBUILDER-NEXT:    [[MUL11:%.*]] = fmul float [[TMP9]], [[TMP12]]
 // CHECK3-IRBUILDER-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK3-IRBUILDER-NEXT:    [[TMP14:%.*]] = load i8, ptr [[I]], align 1
 // CHECK3-IRBUILDER-NEXT:    [[IDXPROM12:%.*]] = zext i8 [[TMP14]] to i64
-// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM12]]
+// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM12]]
 // CHECK3-IRBUILDER-NEXT:    [[TMP15:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
 // CHECK3-IRBUILDER-NEXT:    [[MUL14:%.*]] = fmul float [[MUL11]], [[TMP15]]
 // CHECK3-IRBUILDER-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK3-IRBUILDER-NEXT:    [[TMP17:%.*]] = load i8, ptr [[I]], align 1
 // CHECK3-IRBUILDER-NEXT:    [[IDXPROM15:%.*]] = zext i8 [[TMP17]] to i64
-// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM15]]
+// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM15]]
 // CHECK3-IRBUILDER-NEXT:    store float [[MUL14]], ptr [[ARRAYIDX16]], align 4
 // CHECK3-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK3-IRBUILDER:       omp.inner.for.body.ordered.after:
@@ -2674,21 +2674,21 @@ void foo_simd(int low, int up) {
 // CHECK5:       for.body:
 // CHECK5-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK5-NEXT:    [[TMP2:%.*]] = load i64, ptr [[I]], align 8
-// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 [[TMP2]]
+// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i64 [[TMP2]]
 // CHECK5-NEXT:    [[TMP3:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK5-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK5-NEXT:    [[TMP5:%.*]] = load i64, ptr [[I]], align 8
-// CHECK5-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP5]]
+// CHECK5-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[TMP5]]
 // CHECK5-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
 // CHECK5-NEXT:    [[MUL:%.*]] = fmul float [[TMP3]], [[TMP6]]
 // CHECK5-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK5-NEXT:    [[TMP8:%.*]] = load i64, ptr [[I]], align 8
-// CHECK5-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP8]]
+// CHECK5-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[TMP8]]
 // CHECK5-NEXT:    [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 // CHECK5-NEXT:    [[MUL3:%.*]] = fmul float [[MUL]], [[TMP9]]
 // CHECK5-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i64, ptr [[I]], align 8
-// CHECK5-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP11]]
+// CHECK5-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[TMP11]]
 // CHECK5-NEXT:    store float [[MUL3]], ptr [[ARRAYIDX4]], align 4
 // CHECK5-NEXT:    br label [[FOR_INC:%.*]]
 // CHECK5:       for.inc:
@@ -2804,24 +2804,24 @@ void foo_simd(int low, int up) {
 // CHECK5-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
 // CHECK5-NEXT:    [[TMP3:%.*]] = load i8, ptr [[I]], align 1
 // CHECK5-NEXT:    [[IDXPROM:%.*]] = zext i8 [[TMP3]] to i64
-// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 [[IDXPROM]]
+// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[IDXPROM]]
 // CHECK5-NEXT:    [[TMP4:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK5-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // CHECK5-NEXT:    [[TMP6:%.*]] = load i8, ptr [[I]], align 1
 // CHECK5-NEXT:    [[IDXPROM4:%.*]] = zext i8 [[TMP6]] to i64
-// CHECK5-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[IDXPROM4]]
+// CHECK5-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 [[IDXPROM4]]
 // CHECK5-NEXT:    [[TMP7:%.*]] = load float, ptr [[ARRAYIDX5]], align 4
 // CHECK5-NEXT:    [[MUL:%.*]] = fmul float [[TMP4]], [[TMP7]]
 // CHECK5-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK5-NEXT:    [[TMP9:%.*]] = load i8, ptr [[I]], align 1
 // CHECK5-NEXT:    [[IDXPROM6:%.*]] = zext i8 [[TMP9]] to i64
-// CHECK5-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[IDXPROM6]]
+// CHECK5-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[IDXPROM6]]
 // CHECK5-NEXT:    [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4
 // CHECK5-NEXT:    [[MUL8:%.*]] = fmul float [[MUL]], [[TMP10]]
 // CHECK5-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK5-NEXT:    [[TMP12:%.*]] = load i8, ptr [[I]], align 1
 // CHECK5-NEXT:    [[IDXPROM9:%.*]] = zext i8 [[TMP12]] to i64
-// CHECK5-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[IDXPROM9]]
+// CHECK5-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[IDXPROM9]]
 // CHECK5-NEXT:    store float [[MUL8]], ptr [[ARRAYIDX10]], align 4
 // CHECK5-NEXT:    br label [[FOR_INC:%.*]]
 // CHECK5:       for.inc:
diff --git clang/test/OpenMP/parallel_for_codegen.cpp clang/test/OpenMP/parallel_for_codegen.cpp
index 2dec32d71b91..c7afae419509 100644
--- clang/test/OpenMP/parallel_for_codegen.cpp
+++ clang/test/OpenMP/parallel_for_codegen.cpp
@@ -665,24 +665,24 @@ void range_for_collapsed() {
 // CHECK1-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4
 // CHECK1-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]]
 // CHECK1-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4
 // CHECK1-NEXT:    [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]]
 // CHECK1-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
 // CHECK1-NEXT:    [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4
 // CHECK1-NEXT:    [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]]
+// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]]
 // CHECK1-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4
 // CHECK1-NEXT:    [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]]
 // CHECK1-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4
 // CHECK1-NEXT:    [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]]
+// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]]
 // CHECK1-NEXT:    store float [[MUL8]], ptr [[ARRAYIDX10]], align 4
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1:       omp.body.continue:
@@ -779,21 +779,21 @@ void range_for_collapsed() {
 // CHECK1-NEXT:    store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK1-NEXT:    [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]]
 // CHECK1-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]]
 // CHECK1-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]]
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]]
 // CHECK1-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]]
 // CHECK1-NEXT:    [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]]
 // CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]]
 // CHECK1-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]]
 // CHECK1-NEXT:    [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]]
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
-// CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]]
+// CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]]
 // CHECK1-NEXT:    store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP5]]
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1:       omp.body.continue:
@@ -882,21 +882,21 @@ void range_for_collapsed() {
 // CHECK1-NEXT:    store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK1-NEXT:    [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]]
 // CHECK1-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]]
 // CHECK1-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]]
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]]
 // CHECK1-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]]
 // CHECK1-NEXT:    [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]]
 // CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]]
 // CHECK1-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]]
 // CHECK1-NEXT:    [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]]
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
-// CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]]
+// CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]]
 // CHECK1-NEXT:    store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]]
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1:       omp.body.continue:
@@ -1159,24 +1159,24 @@ void range_for_collapsed() {
 // CHECK1-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]]
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]]
 // CHECK1-NEXT:    [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK1-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]]
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]]
 // CHECK1-NEXT:    [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]]
+// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM6]]
 // CHECK1-NEXT:    [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK1-NEXT:    [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]]
 // CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]]
 // CHECK1-NEXT:    [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]]
 // CHECK1-NEXT:    [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]]
+// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP19]], i64 [[IDXPROM9]]
 // CHECK1-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK1-NEXT:    [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]]
 // CHECK1-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP14]]
 // CHECK1-NEXT:    [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]]
 // CHECK1-NEXT:    [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]]
+// CHECK1-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i64 [[IDXPROM12]]
 // CHECK1-NEXT:    store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1:       omp.body.continue:
@@ -1303,7 +1303,7 @@ void range_for_collapsed() {
 // CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[CALL]] to float
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
 // CHECK1-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[VLA1]], i64 [[IDXPROM]]
 // CHECK1-NEXT:    [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK1-NEXT:    [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4
@@ -1312,7 +1312,7 @@ void range_for_collapsed() {
 // CHECK1-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[I]], align 4
 // CHECK1-NEXT:    [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]]
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM7]]
 // CHECK1-NEXT:    [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4
 // CHECK1-NEXT:    [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]]
 // CHECK1-NEXT:    store float [[ADD9]], ptr [[ARRAYIDX8]], align 4
@@ -1781,24 +1781,24 @@ void range_for_collapsed() {
 // CHECK2-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8
 // CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4
 // CHECK2-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]]
 // CHECK2-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK2-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8
 // CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4
 // CHECK2-NEXT:    [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]]
+// CHECK2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]]
 // CHECK2-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
 // CHECK2-NEXT:    [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK2-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8
 // CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4
 // CHECK2-NEXT:    [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64
-// CHECK2-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]]
+// CHECK2-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]]
 // CHECK2-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4
 // CHECK2-NEXT:    [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]]
 // CHECK2-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8
 // CHECK2-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4
 // CHECK2-NEXT:    [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64
-// CHECK2-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]]
+// CHECK2-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]]
 // CHECK2-NEXT:    store float [[MUL8]], ptr [[ARRAYIDX10]], align 4
 // CHECK2-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK2:       omp.body.continue:
@@ -1895,21 +1895,21 @@ void range_for_collapsed() {
 // CHECK2-NEXT:    store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK2-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK2-NEXT:    [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]]
 // CHECK2-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]]
 // CHECK2-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK2-NEXT:    [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
-// CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]]
+// CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]]
 // CHECK2-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]]
 // CHECK2-NEXT:    [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]]
 // CHECK2-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK2-NEXT:    [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
-// CHECK2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]]
+// CHECK2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]]
 // CHECK2-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]]
 // CHECK2-NEXT:    [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]]
 // CHECK2-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK2-NEXT:    [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
-// CHECK2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]]
+// CHECK2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]]
 // CHECK2-NEXT:    store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP5]]
 // CHECK2-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK2:       omp.body.continue:
@@ -1998,21 +1998,21 @@ void range_for_collapsed() {
 // CHECK2-NEXT:    store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK2-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK2-NEXT:    [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]]
 // CHECK2-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]]
 // CHECK2-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK2-NEXT:    [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
-// CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]]
+// CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]]
 // CHECK2-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]]
 // CHECK2-NEXT:    [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]]
 // CHECK2-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK2-NEXT:    [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
-// CHECK2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]]
+// CHECK2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]]
 // CHECK2-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]]
 // CHECK2-NEXT:    [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]]
 // CHECK2-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK2-NEXT:    [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
-// CHECK2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]]
+// CHECK2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]]
 // CHECK2-NEXT:    store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]]
 // CHECK2-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK2:       omp.body.continue:
@@ -2275,24 +2275,24 @@ void range_for_collapsed() {
 // CHECK2-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]]
 // CHECK2-NEXT:    [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]]
 // CHECK2-NEXT:    [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM]]
 // CHECK2-NEXT:    [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK2-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]]
 // CHECK2-NEXT:    [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]]
 // CHECK2-NEXT:    [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64
-// CHECK2-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]]
+// CHECK2-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM6]]
 // CHECK2-NEXT:    [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK2-NEXT:    [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]]
 // CHECK2-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]]
 // CHECK2-NEXT:    [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]]
 // CHECK2-NEXT:    [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64
-// CHECK2-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]]
+// CHECK2-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP19]], i64 [[IDXPROM9]]
 // CHECK2-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK2-NEXT:    [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]]
 // CHECK2-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP14]]
 // CHECK2-NEXT:    [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]]
 // CHECK2-NEXT:    [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64
-// CHECK2-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]]
+// CHECK2-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i64 [[IDXPROM12]]
 // CHECK2-NEXT:    store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK2-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK2:       omp.body.continue:
@@ -2419,7 +2419,7 @@ void range_for_collapsed() {
 // CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[CALL]] to float
 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
 // CHECK2-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[VLA1]], i64 [[IDXPROM]]
 // CHECK2-NEXT:    [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK2-NEXT:    [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]]
 // CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4
@@ -2428,7 +2428,7 @@ void range_for_collapsed() {
 // CHECK2-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8
 // CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[I]], align 4
 // CHECK2-NEXT:    [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64
-// CHECK2-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]]
+// CHECK2-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM7]]
 // CHECK2-NEXT:    [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4
 // CHECK2-NEXT:    [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]]
 // CHECK2-NEXT:    store float [[ADD9]], ptr [[ARRAYIDX8]], align 4
@@ -2897,24 +2897,24 @@ void range_for_collapsed() {
 // CHECK5-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG45:![0-9]+]]
 // CHECK5-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]]
 // CHECK5-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64, !dbg [[DBG45]]
-// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]], !dbg [[DBG45]]
+// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]], !dbg [[DBG45]]
 // CHECK5-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG45]]
 // CHECK5-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG45]]
 // CHECK5-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]]
 // CHECK5-NEXT:    [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64, !dbg [[DBG45]]
-// CHECK5-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]], !dbg [[DBG45]]
+// CHECK5-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]], !dbg [[DBG45]]
 // CHECK5-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG45]]
 // CHECK5-NEXT:    [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]], !dbg [[DBG45]]
 // CHECK5-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG45]]
 // CHECK5-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]]
 // CHECK5-NEXT:    [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64, !dbg [[DBG45]]
-// CHECK5-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]], !dbg [[DBG45]]
+// CHECK5-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]], !dbg [[DBG45]]
 // CHECK5-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG45]]
 // CHECK5-NEXT:    [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]], !dbg [[DBG45]]
 // CHECK5-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG45]]
 // CHECK5-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]]
 // CHECK5-NEXT:    [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64, !dbg [[DBG45]]
-// CHECK5-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]], !dbg [[DBG45]]
+// CHECK5-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]], !dbg [[DBG45]]
 // CHECK5-NEXT:    store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !dbg [[DBG45]]
 // CHECK5-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG46:![0-9]+]]
 // CHECK5:       omp.body.continue:
@@ -3011,21 +3011,21 @@ void range_for_collapsed() {
 // CHECK5-NEXT:    store i64 [[ADD1]], ptr [[I]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]]
 // CHECK5-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG56:![0-9]+]], !llvm.access.group [[ACC_GRP55]]
 // CHECK5-NEXT:    [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]]
-// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]], !dbg [[DBG56]]
+// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]], !dbg [[DBG56]]
 // CHECK5-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]]
 // CHECK5-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]]
 // CHECK5-NEXT:    [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]]
-// CHECK5-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]], !dbg [[DBG56]]
+// CHECK5-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]], !dbg [[DBG56]]
 // CHECK5-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]]
 // CHECK5-NEXT:    [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG56]]
 // CHECK5-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]]
 // CHECK5-NEXT:    [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]]
-// CHECK5-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]], !dbg [[DBG56]]
+// CHECK5-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]], !dbg [[DBG56]]
 // CHECK5-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]]
 // CHECK5-NEXT:    [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG56]]
 // CHECK5-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]]
 // CHECK5-NEXT:    [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]]
-// CHECK5-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]], !dbg [[DBG56]]
+// CHECK5-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]], !dbg [[DBG56]]
 // CHECK5-NEXT:    store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]]
 // CHECK5-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG57:![0-9]+]]
 // CHECK5:       omp.body.continue:
@@ -3114,21 +3114,21 @@ void range_for_collapsed() {
 // CHECK5-NEXT:    store i64 [[ADD1]], ptr [[I]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]]
 // CHECK5-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG68:![0-9]+]], !llvm.access.group [[ACC_GRP67]]
 // CHECK5-NEXT:    [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]]
-// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]], !dbg [[DBG68]]
+// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]], !dbg [[DBG68]]
 // CHECK5-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]]
 // CHECK5-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]]
 // CHECK5-NEXT:    [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]]
-// CHECK5-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]], !dbg [[DBG68]]
+// CHECK5-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]], !dbg [[DBG68]]
 // CHECK5-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]]
 // CHECK5-NEXT:    [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG68]]
 // CHECK5-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]]
 // CHECK5-NEXT:    [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]]
-// CHECK5-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]], !dbg [[DBG68]]
+// CHECK5-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]], !dbg [[DBG68]]
 // CHECK5-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]]
 // CHECK5-NEXT:    [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG68]]
 // CHECK5-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]]
 // CHECK5-NEXT:    [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]]
-// CHECK5-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]], !dbg [[DBG68]]
+// CHECK5-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]], !dbg [[DBG68]]
 // CHECK5-NEXT:    store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]]
 // CHECK5-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG69:![0-9]+]]
 // CHECK5:       omp.body.continue:
@@ -3391,24 +3391,24 @@ void range_for_collapsed() {
 // CHECK5-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG97:![0-9]+]], !llvm.access.group [[ACC_GRP95]]
 // CHECK5-NEXT:    [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]]
 // CHECK5-NEXT:    [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64, !dbg [[DBG97]]
-// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]], !dbg [[DBG97]]
+// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM]], !dbg [[DBG97]]
 // CHECK5-NEXT:    [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]]
 // CHECK5-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]]
 // CHECK5-NEXT:    [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]]
 // CHECK5-NEXT:    [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64, !dbg [[DBG97]]
-// CHECK5-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]], !dbg [[DBG97]]
+// CHECK5-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM6]], !dbg [[DBG97]]
 // CHECK5-NEXT:    [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]]
 // CHECK5-NEXT:    [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]], !dbg [[DBG97]]
 // CHECK5-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]]
 // CHECK5-NEXT:    [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]]
 // CHECK5-NEXT:    [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64, !dbg [[DBG97]]
-// CHECK5-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]], !dbg [[DBG97]]
+// CHECK5-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP19]], i64 [[IDXPROM9]], !dbg [[DBG97]]
 // CHECK5-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]]
 // CHECK5-NEXT:    [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]], !dbg [[DBG97]]
 // CHECK5-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]]
 // CHECK5-NEXT:    [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]]
 // CHECK5-NEXT:    [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64, !dbg [[DBG97]]
-// CHECK5-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]], !dbg [[DBG97]]
+// CHECK5-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i64 [[IDXPROM12]], !dbg [[DBG97]]
 // CHECK5-NEXT:    store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]]
 // CHECK5-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG98:![0-9]+]]
 // CHECK5:       omp.body.continue:
@@ -3535,7 +3535,7 @@ void range_for_collapsed() {
 // CHECK5-NEXT:    [[CONV:%.*]] = sitofp i32 [[CALL]] to float, !dbg [[DBG111]]
 // CHECK5-NEXT:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG111]]
 // CHECK5-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64, !dbg [[DBG111]]
-// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]], !dbg [[DBG111]]
+// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[VLA1]], i64 [[IDXPROM]], !dbg [[DBG111]]
 // CHECK5-NEXT:    [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG111]]
 // CHECK5-NEXT:    [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]], !dbg [[DBG111]]
 // CHECK5-NEXT:    [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4, !dbg [[DBG111]]
@@ -3544,7 +3544,7 @@ void range_for_collapsed() {
 // CHECK5-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG111]]
 // CHECK5-NEXT:    [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG111]]
 // CHECK5-NEXT:    [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64, !dbg [[DBG111]]
-// CHECK5-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]], !dbg [[DBG111]]
+// CHECK5-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM7]], !dbg [[DBG111]]
 // CHECK5-NEXT:    [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG111]]
 // CHECK5-NEXT:    [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]], !dbg [[DBG111]]
 // CHECK5-NEXT:    store float [[ADD9]], ptr [[ARRAYIDX8]], align 4, !dbg [[DBG111]]
@@ -4013,24 +4013,24 @@ void range_for_collapsed() {
 // CHECK6-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8
 // CHECK6-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4
 // CHECK6-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64
-// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]]
+// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]]
 // CHECK6-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK6-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8
 // CHECK6-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4
 // CHECK6-NEXT:    [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK6-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]]
+// CHECK6-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]]
 // CHECK6-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
 // CHECK6-NEXT:    [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]]
 // CHECK6-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8
 // CHECK6-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4
 // CHECK6-NEXT:    [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64
-// CHECK6-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]]
+// CHECK6-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]]
 // CHECK6-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4
 // CHECK6-NEXT:    [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]]
 // CHECK6-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8
 // CHECK6-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I]], align 4
 // CHECK6-NEXT:    [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64
-// CHECK6-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]]
+// CHECK6-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]]
 // CHECK6-NEXT:    store float [[MUL8]], ptr [[ARRAYIDX10]], align 4
 // CHECK6-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK6:       omp.body.continue:
@@ -4127,21 +4127,21 @@ void range_for_collapsed() {
 // CHECK6-NEXT:    store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK6-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK6-NEXT:    [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
-// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]]
+// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]]
 // CHECK6-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]]
 // CHECK6-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK6-NEXT:    [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
-// CHECK6-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]]
+// CHECK6-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]]
 // CHECK6-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]]
 // CHECK6-NEXT:    [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]]
 // CHECK6-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK6-NEXT:    [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
-// CHECK6-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]]
+// CHECK6-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]]
 // CHECK6-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]]
 // CHECK6-NEXT:    [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]]
 // CHECK6-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP5]]
 // CHECK6-NEXT:    [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]]
-// CHECK6-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]]
+// CHECK6-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]]
 // CHECK6-NEXT:    store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP5]]
 // CHECK6-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK6:       omp.body.continue:
@@ -4230,21 +4230,21 @@ void range_for_collapsed() {
 // CHECK6-NEXT:    store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK6-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK6-NEXT:    [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
-// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]]
+// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]]
 // CHECK6-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]]
 // CHECK6-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK6-NEXT:    [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
-// CHECK6-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]]
+// CHECK6-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]]
 // CHECK6-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]]
 // CHECK6-NEXT:    [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]]
 // CHECK6-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK6-NEXT:    [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
-// CHECK6-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]]
+// CHECK6-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]]
 // CHECK6-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]]
 // CHECK6-NEXT:    [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]]
 // CHECK6-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]]
 // CHECK6-NEXT:    [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]]
-// CHECK6-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]]
+// CHECK6-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]]
 // CHECK6-NEXT:    store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]]
 // CHECK6-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK6:       omp.body.continue:
@@ -4507,24 +4507,24 @@ void range_for_collapsed() {
 // CHECK6-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]]
 // CHECK6-NEXT:    [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]]
 // CHECK6-NEXT:    [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64
-// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]]
+// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM]]
 // CHECK6-NEXT:    [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK6-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]]
 // CHECK6-NEXT:    [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]]
 // CHECK6-NEXT:    [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64
-// CHECK6-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]]
+// CHECK6-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM6]]
 // CHECK6-NEXT:    [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK6-NEXT:    [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]]
 // CHECK6-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]]
 // CHECK6-NEXT:    [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]]
 // CHECK6-NEXT:    [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64
-// CHECK6-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]]
+// CHECK6-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP19]], i64 [[IDXPROM9]]
 // CHECK6-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK6-NEXT:    [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]]
 // CHECK6-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP14]]
 // CHECK6-NEXT:    [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]]
 // CHECK6-NEXT:    [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64
-// CHECK6-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]]
+// CHECK6-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i64 [[IDXPROM12]]
 // CHECK6-NEXT:    store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK6-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK6:       omp.body.continue:
@@ -4649,7 +4649,7 @@ void range_for_collapsed() {
 // CHECK6-NEXT:    [[CONV:%.*]] = sitofp i32 [[CALL]] to float
 // CHECK6-NEXT:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
 // CHECK6-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64
-// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]]
+// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[VLA1]], i64 [[IDXPROM]]
 // CHECK6-NEXT:    [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 // CHECK6-NEXT:    [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]]
 // CHECK6-NEXT:    [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4
@@ -4658,7 +4658,7 @@ void range_for_collapsed() {
 // CHECK6-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8
 // CHECK6-NEXT:    [[TMP17:%.*]] = load i32, ptr [[I]], align 4
 // CHECK6-NEXT:    [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64
-// CHECK6-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]]
+// CHECK6-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM7]]
 // CHECK6-NEXT:    [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4
 // CHECK6-NEXT:    [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]]
 // CHECK6-NEXT:    store float [[ADD9]], ptr [[ARRAYIDX8]], align 4
diff --git clang/test/OpenMP/parallel_for_linear_codegen.cpp clang/test/OpenMP/parallel_for_linear_codegen.cpp
index 8b46797ae253..15eb0dfa42af 100644
--- clang/test/OpenMP/parallel_for_linear_codegen.cpp
+++ clang/test/OpenMP/parallel_for_linear_codegen.cpp
@@ -337,7 +337,7 @@ int main() {
 // CHECK1-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP14]], [[MUL6]]
 // CHECK1-NEXT:    store i32 [[ADD7]], ptr [[LVAR3]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[PVAR2]], align 8
-// CHECK1-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 1
+// CHECK1-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP16]], i32 1
 // CHECK1-NEXT:    store ptr [[INCDEC_PTR]], ptr [[PVAR2]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[LVAR3]], align 4
 // CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP17]], 1
diff --git clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
index 752aec788bf3..59d169d7a173 100644
--- clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
+++ clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
@@ -83,16 +83,16 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[ARGC1]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]]
 // CHECK1-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9
 // CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
 // CHECK1-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64
 // CHECK1-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
@@ -122,7 +122,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]]
 // CHECK1-NEXT:    store ptr [[_TMP6]], ptr [[_TMP5]], align 8
 // CHECK1-NEXT:    store ptr [[TMP21]], ptr [[_TMP6]], align 8
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC1]], ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -137,19 +137,19 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP27]], align 8
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0
 // CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0
 // CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP33]]
 // CHECK1-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9
 // CHECK1-NEXT:    [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN10]]
+// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN10]]
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP29]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX9]], ptr [[TMP36]], align 8
@@ -470,9 +470,9 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9
 // CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
 // CHECK1-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
 // CHECK1-NEXT:    [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
diff --git clang/test/OpenMP/parallel_for_scan_codegen.cpp clang/test/OpenMP/parallel_for_scan_codegen.cpp
index 161534814a79..67b32407c712 100644
--- clang/test/OpenMP/parallel_for_scan_codegen.cpp
+++ clang/test/OpenMP/parallel_for_scan_codegen.cpp
@@ -28,9 +28,9 @@ void baz(int n) {
 
   // CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(
   // CHECK: [[LAST:%.+]] = mul nsw i64 9, %
-  // CHECK: [[LAST_REF:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[LAST]]
+  // CHECK: [[LAST_REF:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[LAST]]
   // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 @_ZZ3baziE1a, ptr align 4 [[LAST_REF]], i64 %{{.+}}, i1 false)
-  // CHECK: [[LAST_REF_B:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 9
+  // CHECK: [[LAST_REF_B:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 9
   // CHECK: [[LAST_VAL:%.+]] = load double, ptr [[LAST_REF_B]],
   // CHECK: store double [[LAST_VAL]], ptr @_ZZ3baziE1b,
 
@@ -58,13 +58,13 @@ void baz(int n) {
     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF:%.+]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF:%.+]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false)
 
     // b_buffer[i] = b_priv;
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]]
     // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]],
     // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]],
     // CHECK: br label %[[LOOP_CONTINUE:.+]]
@@ -91,13 +91,13 @@ void baz(int n) {
 
     // a_buffer[i] += a_buffer[i-pow(2, k)];
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]]
+    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
-    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
+    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
     // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
     // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]]
     // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
@@ -151,13 +151,13 @@ void baz(int n) {
     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false)
 
     // b_priv = b_buffer[i];
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]]
     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
     // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]],
     // CHECK: br label %[[SCAN_PHASE:[^,]+]]
@@ -188,13 +188,13 @@ void baz(int n) {
     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF:%.+]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF:%.+]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false)
 
     // b_buffer[i] = b_priv;
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]]
     // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]],
     // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]],
     // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
@@ -226,13 +226,13 @@ void baz(int n) {
 
     // a_buffer[i] += a_buffer[i-pow(2, k)];
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]]
+    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
-    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
+    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
     // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
     // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]]
     // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
@@ -289,13 +289,13 @@ void baz(int n) {
     // CHECK: [[IF_THEN]]:
     // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false)
 
     // b_priv = b_buffer[i];
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
     // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]],
     // CHECK: br label %[[SCAN_PHASE]]
diff --git clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp
index 7e973a602a65..cac997753d48 100644
--- clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp
+++ clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp
@@ -51,13 +51,13 @@ void baz(int n) {
     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF:%.+]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF:%.+]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false)
 
     // b_buffer[i] = b_priv;
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]]
     // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]],
     // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]],
     // CHECK: br label %[[LOOP_CONTINUE:.+]]
@@ -84,13 +84,13 @@ void baz(int n) {
 
     // a_buffer[i] += a_buffer[i-pow(2, k)];
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]]
+    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
-    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
+    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
     // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
     // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]]
     // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
@@ -144,13 +144,13 @@ void baz(int n) {
     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false)
 
     // b_priv = b_buffer[i];
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]]
     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
     // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]],
     // CHECK: br label %[[SCAN_PHASE:[^,]+]]
@@ -181,13 +181,13 @@ void baz(int n) {
     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF:%.+]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF:%.+]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false)
 
     // b_buffer[i] = b_priv;
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]]
     // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]],
     // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]],
     // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
@@ -219,13 +219,13 @@ void baz(int n) {
 
     // a_buffer[i] += a_buffer[i-pow(2, k)];
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]]
+    // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]]
     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
-    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
+    // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
     // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
     // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]]
     // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
@@ -282,13 +282,13 @@ void baz(int n) {
     // CHECK: [[IF_THEN]]:
     // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1
     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]]
-    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
-    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
+    // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
+    // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false)
 
     // b_priv = b_buffer[i];
-    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
+    // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
     // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]],
     // CHECK: br label %[[SCAN_PHASE]]
diff --git clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
index b17757a5f978..c1fe00f23800 100644
--- clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
+++ clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
@@ -72,16 +72,16 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[ARGC1]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]]
 // CHECK1-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9
 // CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
 // CHECK1-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64
 // CHECK1-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
@@ -111,7 +111,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]]
 // CHECK1-NEXT:    store ptr [[_TMP5]], ptr [[TMP]], align 8
 // CHECK1-NEXT:    store ptr [[TMP21]], ptr [[_TMP5]], align 8
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC1]], ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -126,19 +126,19 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP27]], align 8
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0
 // CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0
 // CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]]
 // CHECK1-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9
 // CHECK1-NEXT:    [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP29]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8
@@ -425,9 +425,9 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9
 // CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
 // CHECK1-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
 // CHECK1-NEXT:    [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
diff --git clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp
index 3c74aaca3f46..1d106922435d 100644
--- clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp
+++ clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp
@@ -84,9 +84,9 @@ sum = 0.0;
 // CHECK-DAG:    store ptr @[[RED_COMB1:.+]], ptr [[TMP25]],
 // CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK-DAG:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 4, i1 false)
-// CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C:%.+]], i64 0, i64 0
+// CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C:%.+]], i64 0, i64 0
 // CHECK-DAG:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, %
-// CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]]
+// CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]]
 // CHECK-DAG:    store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]],
 // CHECK-DAG:    [[TMP28]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
 // CHECK-DAG:    store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]],
@@ -138,10 +138,10 @@ sum = 0.0;
 // CHECK-DAG:    store ptr @[[RED_COMB4:.+]], ptr [[TMP59]],
 // CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_8]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, ptr [[TMP60]],
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
 // CHECK:    [[TMP62:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0:%.+]], i32 4, ptr [[DOTRD_INPUT_]])
 // CHECK:    [[TMP63:%.*]] = load i32, ptr [[N:%.+]],
 // CHECK:    store i32 [[TMP63]], ptr [[DOTCAPTURE_EXPR_]],
diff --git clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
index 7d4216ddde6a..9a524c3b94c6 100644
--- clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
+++ clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
@@ -84,9 +84,9 @@ sum = 0.0;
 // CHECK-DAG:    store ptr @[[RED_COMB1:.+]], ptr [[TMP25]],
 // CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK-DAG:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 4, i1 false)
-// CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C:%.+]], i64 0, i64 0
+// CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C:%.+]], i64 0, i64 0
 // CHECK-DAG:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, %
-// CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]]
+// CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]]
 // CHECK-DAG:    store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]],
 // CHECK-DAG:    [[TMP28]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
 // CHECK-DAG:    store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]],
@@ -138,10 +138,10 @@ sum = 0.0;
 // CHECK-DAG:    store ptr @[[RED_COMB4:.+]], ptr [[TMP59]],
 // CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_8]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, ptr [[TMP60]],
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
 // CHECK:    [[TMP62:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0:%.+]], i32 4, ptr [[DOTRD_INPUT_]])
 // CHECK:    [[TMP63:%.*]] = load i32, ptr [[N:%.+]],
 // CHECK:    store i32 [[TMP63]], ptr [[DOTCAPTURE_EXPR_]],
diff --git clang/test/OpenMP/parallel_reduction_codegen.cpp clang/test/OpenMP/parallel_reduction_codegen.cpp
index f49faa6b89de..ce76429b871f 100644
--- clang/test/OpenMP/parallel_reduction_codegen.cpp
+++ clang/test/OpenMP/parallel_reduction_codegen.cpp
@@ -354,9 +354,9 @@ int main() {
 // CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[X]], ptr [[X_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP0]], i64 0
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i64 0
 // CHECK1-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64
 // CHECK1-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK1-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
@@ -1632,9 +1632,9 @@ int main() {
 // CHECK3-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
 // CHECK3-NEXT:    store ptr [[X]], ptr [[X_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP0]], i64 0
 // CHECK3-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8
-// CHECK3-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 0
+// CHECK3-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i64 0
 // CHECK3-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64
 // CHECK3-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK3-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
@@ -2142,9 +2142,9 @@ int main() {
 // CHECK4-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
 // CHECK4-NEXT:    store ptr [[X]], ptr [[X_ADDR]], align 8
 // CHECK4-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8
-// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0
+// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP0]], i64 0
 // CHECK4-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8
-// CHECK4-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 0
+// CHECK4-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i64 0
 // CHECK4-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64
 // CHECK4-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK4-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
diff --git clang/test/OpenMP/parallel_reduction_task_codegen.cpp clang/test/OpenMP/parallel_reduction_task_codegen.cpp
index 208f7a41aa3d..40cc3103b1c0 100644
--- clang/test/OpenMP/parallel_reduction_task_codegen.cpp
+++ clang/test/OpenMP/parallel_reduction_task_codegen.cpp
@@ -72,16 +72,16 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[ARGC1]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]]
 // CHECK1-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9
 // CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
 // CHECK1-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64
 // CHECK1-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
@@ -111,7 +111,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]]
 // CHECK1-NEXT:    store ptr [[_TMP5]], ptr [[TMP]], align 8
 // CHECK1-NEXT:    store ptr [[TMP21]], ptr [[_TMP5]], align 8
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC1]], ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -126,19 +126,19 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP27]], align 8
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0
 // CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0
 // CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]]
 // CHECK1-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9
 // CHECK1-NEXT:    [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP29]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8
@@ -416,9 +416,9 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9
 // CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
 // CHECK1-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
 // CHECK1-NEXT:    [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
diff --git clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
index 6d73652c3ea2..61597a074cf5 100644
--- clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
+++ clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
@@ -81,16 +81,16 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[ARGC1]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]]
 // CHECK1-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9
 // CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
 // CHECK1-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64
 // CHECK1-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
@@ -120,7 +120,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]]
 // CHECK1-NEXT:    store ptr [[_TMP5]], ptr [[TMP]], align 8
 // CHECK1-NEXT:    store ptr [[TMP21]], ptr [[_TMP5]], align 8
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC1]], ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -135,19 +135,19 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP27]], align 8
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0
 // CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0
 // CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]]
 // CHECK1-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9
 // CHECK1-NEXT:    [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP29]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8
@@ -458,9 +458,9 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9
 // CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
 // CHECK1-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
 // CHECK1-NEXT:    [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
diff --git clang/test/OpenMP/reduction_implicit_map.cpp clang/test/OpenMP/reduction_implicit_map.cpp
index 4d2b93ffd471..a7db3da7d1f8 100644
--- clang/test/OpenMP/reduction_implicit_map.cpp
+++ clang/test/OpenMP/reduction_implicit_map.cpp
@@ -133,9 +133,9 @@ int main()
 // CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
 // CHECK-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 0
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i64 0
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 8
-// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 0
+// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i64 0
 // CHECK-NEXT:    store double 0.000000e+00, ptr [[E2]], align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64
@@ -529,16 +529,16 @@ int main()
 // CHECK1-NEXT:    store i64 9, ptr [[DOTOMP_UB]], align 8
 // CHECK1-NEXT:    store i64 1, ptr [[DOTOMP_STRIDE]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0
 // CHECK1-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY]], i64 2
+// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [10 x double], ptr [[ARRAYDECAY]], i64 2
 // CHECK1-NEXT:    [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX1]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY2]], i64 1
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw double, ptr [[ARRAYDECAY2]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1
 // CHECK1-NEXT:    [[ARRAYDECAY5:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX4]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY5]], i64 5
+// CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [10 x double], ptr [[ARRAYDECAY5]], i64 5
 // CHECK1-NEXT:    [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX6]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY7]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw double, ptr [[ARRAYDECAY7]], i64 1
 // CHECK1-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64
 // CHECK1-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64
 // CHECK1-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]]
@@ -564,18 +564,18 @@ int main()
 // CHECK1-NEXT:    [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]]
 // CHECK1-NEXT:    [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64)
 // CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP12]]
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [1 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [1 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
-// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0
 // CHECK1-NEXT:    [[ARRAYDECAY10:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX9]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY10]], i64 2
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw [10 x double], ptr [[ARRAYDECAY10]], i64 2
 // CHECK1-NEXT:    [[ARRAYDECAY12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY12]], i64 1
-// CHECK1-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1
+// CHECK1-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw double, ptr [[ARRAYDECAY12]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1
 // CHECK1-NEXT:    [[ARRAYDECAY15:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX14]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY15]], i64 5
+// CHECK1-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw [10 x double], ptr [[ARRAYDECAY15]], i64 5
 // CHECK1-NEXT:    [[ARRAYDECAY17:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX16]], i64 0, i64 0
-// CHECK1-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY17]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw double, ptr [[ARRAYDECAY17]], i64 1
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX13]], ptr [[TMP15]], align 8
@@ -852,7 +852,7 @@ int main()
 // CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
 // CHECK2-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
 // CHECK2-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
-// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
+// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 0
 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
 // CHECK2-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK2-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
@@ -930,10 +930,10 @@ int main()
 // CHECK2-NEXT:    [[TMP46:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
 // CHECK2-NEXT:    [[TMP47:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
 // CHECK2-NEXT:    [[TMP48:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
-// CHECK2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP48]], i32 0
+// CHECK2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP48]], i32 0
 // CHECK2-NEXT:    [[TMP49:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
 // CHECK2-NEXT:    [[TMP50:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
-// CHECK2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP50]], i32 0
+// CHECK2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP50]], i32 0
 // CHECK2-NEXT:    [[TMP51:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
 // CHECK2-NEXT:    [[TMP52:%.*]] = mul nuw i32 [[TMP51]], 4
 // CHECK2-NEXT:    [[TMP53:%.*]] = sext i32 [[TMP52]] to i64
@@ -1007,7 +1007,7 @@ int main()
 // CHECK2-NEXT:    [[TMP86:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
 // CHECK2-NEXT:    store i32 [[TMP86]], ptr [[SIZE_CASTED21]], align 4
 // CHECK2-NEXT:    [[TMP87:%.*]] = load i32, ptr [[SIZE_CASTED21]], align 4
-// CHECK2-NEXT:    [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[A]], i32 0, i32 0
+// CHECK2-NEXT:    [[ARRAYIDX22:%.*]] = getelementptr inbounds nuw [10 x i32], ptr [[A]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 [[TMP87]], ptr [[TMP88]], align 4
 // CHECK2-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
@@ -1480,9 +1480,9 @@ int main()
 // CHECK2-NEXT:    store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
 // CHECK2-NEXT:    store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
 // CHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 0
 // CHECK2-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
-// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2
+// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 2
 // CHECK2-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT2]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3
 // CHECK2-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
@@ -1664,9 +1664,9 @@ int main()
 // CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 0
 // CHECK2-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
-// CHECK2-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2
+// CHECK2-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 2
 // CHECK2-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT4]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3
 // CHECK2-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP8]]
@@ -1878,8 +1878,8 @@ int main()
 // CHECK2-NEXT:    store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
 // CHECK2-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 0
-// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 1
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [10 x i32], ptr [[TMP0]], i32 0, i32 0
+// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [10 x i32], ptr [[TMP0]], i32 0, i32 1
 // CHECK2-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[A2]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 2
 // CHECK2-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]]
diff --git clang/test/OpenMP/sections_reduction_task_codegen.cpp clang/test/OpenMP/sections_reduction_task_codegen.cpp
index 1a2cf7aede32..5d749eeb8177 100644
--- clang/test/OpenMP/sections_reduction_task_codegen.cpp
+++ clang/test/OpenMP/sections_reduction_task_codegen.cpp
@@ -82,16 +82,16 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[ARGC1]], align 4
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP2]], i64 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP5]]
 // CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP6]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP6]], i64 9
 // CHECK1-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
 // CHECK1-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64
 // CHECK1-NEXT:    [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]]
@@ -121,7 +121,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP21]]
 // CHECK1-NEXT:    store ptr [[_TMP5]], ptr [[TMP]], align 8
 // CHECK1-NEXT:    store ptr [[TMP22]], ptr [[_TMP5]], align 8
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC1]], ptr [[TMP23]], align 8
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -136,19 +136,19 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP28]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP29]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP31]], i64 0
 // CHECK1-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP32]], i64 0
 // CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP34:%.*]] = sext i32 [[TMP33]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP34]]
 // CHECK1-NEXT:    [[TMP35:%.*]] = load ptr, ptr [[TMP1]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP35]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP35]], i64 9
 // CHECK1-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 [[LB_ADD_LEN9]]
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP36]], i64 [[LB_ADD_LEN9]]
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP30]], align 8
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX8]], ptr [[TMP37]], align 8
@@ -463,9 +463,9 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9
 // CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
 // CHECK1-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
 // CHECK1-NEXT:    [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
diff --git clang/test/OpenMP/target_data_use_device_addr_codegen.cpp clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
index d912f801a33d..19a523ee165c 100644
--- clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
+++ clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
@@ -54,12 +54,12 @@ int main() {
 // CHECK: [[SIZES:%.+]] = alloca [6 x i64],
 // CHECK: [[VLA_ADDR:%.+]] = alloca float, i64 %{{.+}},
 // CHECK: [[PTR:%.+]] = load ptr, ptr [[PTR_ADDR]],
-// CHECK-NEXT: [[ARR_IDX:%.+]] = getelementptr inbounds float, ptr [[PTR]], i64 3
+// CHECK-NEXT: [[ARR_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[PTR]], i64 3
 // CHECK: [[P5:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8
 // CHECK-NEXT: [[ARR_IDX1:%.+]] = getelementptr inbounds float, ptr [[P5]], i64 0
 // CHECK: [[P7:%.+]] = load ptr, ptr [[REF_ADDR]],
 // CHECK-NEXT: [[REF:%.+]] = load ptr, ptr [[REF_ADDR]],
-// CHECK-NEXT: [[ARR_IDX2:%.+]] = getelementptr inbounds [4 x float], ptr [[ARR_ADDR]], i64 0, i64 0
+// CHECK-NEXT: [[ARR_IDX2:%.+]] = getelementptr inbounds nuw [4 x float], ptr [[ARR_ADDR]], i64 0, i64 0
 // CHECK: [[P10:%.+]] = mul nuw i64 {{.+}}, 4
 // CHECK-NEXT: [[ARR_IDX5:%.+]] = getelementptr inbounds float, ptr [[VLA_ADDR]], i64 0
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[SIZES]], ptr align 8 [[SIZES1]], i64 48, i1 false)
@@ -132,14 +132,14 @@ int main() {
 // CHECK: [[SIZES:%.+]] = alloca [6 x i64],
 // CHECK: [[A_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS:%.+]], i32 0, i32 0
 // CHECK: [[PTR_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 1
-// CHECK: [[ARR_IDX:%.+]] = getelementptr inbounds i32, ptr %{{.+}}, i64 3
+// CHECK: [[ARR_IDX:%.+]] = getelementptr inbounds nuw i32, ptr %{{.+}}, i64 3
 // CHECK: [[REF_REF:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 2
 // CHECK: [[REF_PTR:%.+]] = load ptr, ptr [[REF_REF]],
 // CHECK-NEXT: [[P3:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 1
 // CHECK: [[ARR_IDX5:%.+]] = getelementptr inbounds i32, ptr {{.+}}, i64 0
 // CHECK: [[ARR_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 3
 
-// CHECK: [[ARR_IDX6:%.+]] = getelementptr inbounds [4 x i32], ptr [[ARR_ADDR]], i64 0, i64 0
+// CHECK: [[ARR_IDX6:%.+]] = getelementptr inbounds nuw [4 x i32], ptr [[ARR_ADDR]], i64 0, i64 0
 // CHECK: [[A_ADDR2:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 0
 // CHECK: [[P4:%.+]] = mul nuw i64 [[CONV:%.+]], 4
 // CHECK: [[A_ADDR3:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 0
@@ -147,7 +147,7 @@ int main() {
 // CHECK: [[L6:%.+]] = sext i32 [[L5]] to i64
 // CHECK: [[LB_ADD_LEN:%lb_add_len]] = add nsw i64 -1, [[L6]]
 // CHECK: [[ARR_ADDR9:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 3
-// CHECK: [[ARR_IDX10:%arrayidx.+]] = getelementptr inbounds [4 x i32], ptr [[ARR_ADDR9]], i64 0, i64 %lb_add_len
+// CHECK: [[ARR_IDX10:%arrayidx.+]] = getelementptr inbounds nuw [4 x i32], ptr [[ARR_ADDR9]], i64 0, i64 %lb_add_len
 // CHECK: [[ARR_END:%.+]] = getelementptr i32, ptr [[ARR_IDX10]], i32 1
 // CHECK: [[E:%.+]] = ptrtoint ptr [[ARR_END]] to i64
 // CHECK: [[B:%.+]] = ptrtoint ptr [[A_ADDR]] to i64
diff --git clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp
index c90819dc2a22..6d1c0213d648 100644
--- clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp
+++ clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp
@@ -49,14 +49,14 @@ void foo(float *&lr, T *&tr) {
   // CK1-NOT: store ptr [[VAL]], ptr [[DECL]],
   // CK1:     store ptr [[VAL]], ptr [[PVT:%.+]],
   // CK1:     [[TT:%.+]] = load ptr, ptr [[PVT]],
-  // CK1:     getelementptr inbounds double, ptr [[TT]], i32 1
+  // CK1:     getelementptr inbounds nuw double, ptr [[TT]], i32 1
   #pragma omp target data map(g[:10]) use_device_ptr(g)
   {
     ++g;
   }
   // CK1:     call void @__tgt_target_data_end{{.+}}[[MTYPE00]]
   // CK1:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
-  // CK1:     getelementptr inbounds double, ptr [[TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw double, ptr [[TTT]], i32 1
   ++g;
 
   // CK1:     [[T1:%.+]] = load ptr, ptr [[DECL:%.+]],
@@ -67,26 +67,26 @@ void foo(float *&lr, T *&tr) {
   // CK1-NOT: store ptr [[VAL]], ptr [[DECL]],
   // CK1:     store ptr [[VAL]], ptr [[PVT:%.+]],
   // CK1:     [[TT1:%.+]] = load ptr, ptr [[PVT]],
-  // CK1:     getelementptr inbounds float, ptr [[TT1]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[TT1]], i32 1
   #pragma omp target data map(l[:10]) use_device_ptr(l)
   {
     ++l;
   }
   // CK1:     call void @__tgt_target_data_end{{.+}}[[MTYPE01]]
   // CK1:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
-  // CK1:     getelementptr inbounds float, ptr [[TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[TTT]], i32 1
   ++l;
 
   // CK1-NOT: call void @__tgt_target
   // CK1:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
-  // CK1:     getelementptr inbounds float, ptr [[TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[TTT]], i32 1
   #pragma omp target data map(l[:10]) use_device_ptr(l) if(0)
   {
     ++l;
   }
   // CK1-NOT: call void @__tgt_target
   // CK1:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
-  // CK1:     getelementptr inbounds float, ptr [[TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[TTT]], i32 1
   ++l;
 
   // CK1:     [[T1:%.+]] = load ptr, ptr [[DECL:%.+]],
@@ -97,14 +97,14 @@ void foo(float *&lr, T *&tr) {
   // CK1-NOT: store ptr [[VAL]], ptr [[DECL]],
   // CK1:     store ptr [[VAL]], ptr [[PVT:%.+]],
   // CK1:     [[TT1:%.+]] = load ptr, ptr [[PVT]],
-  // CK1:     getelementptr inbounds float, ptr [[TT1]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[TT1]], i32 1
   #pragma omp target data map(l[:10]) use_device_ptr(l) if(1)
   {
     ++l;
   }
   // CK1:     call void @__tgt_target_data_end{{.+}}[[MTYPE03]]
   // CK1:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
-  // CK1:     getelementptr inbounds float, ptr [[TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[TTT]], i32 1
   ++l;
 
   // CK1:     [[CMP:%.+]] = icmp ne ptr %{{.+}}, null
@@ -119,12 +119,12 @@ void foo(float *&lr, T *&tr) {
   // CK1-NOT: store ptr [[VAL]], ptr [[DECL]],
   // CK1:     store ptr [[VAL]], ptr [[PVT:%.+]],
   // CK1:     [[TT1:%.+]] = load ptr, ptr [[PVT]],
-  // CK1:     getelementptr inbounds float, ptr [[TT1]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[TT1]], i32 1
   // CK1:     br label %[[BEND:.+]]
 
   // CK1:     [[BELSE]]:
   // CK1:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
-  // CK1:     getelementptr inbounds float, ptr [[TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[TTT]], i32 1
   // CK1:     br label %[[BEND]]
   #pragma omp target data map(l[:10]) use_device_ptr(l) if(lr != 0)
   {
@@ -142,7 +142,7 @@ void foo(float *&lr, T *&tr) {
 
   // CK1:     [[BEND]]:
   // CK1:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
-  // CK1:     getelementptr inbounds float, ptr [[TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[TTT]], i32 1
   ++l;
 
   // CK1:     [[T2:%.+]] = load ptr, ptr [[DECL:%.+]],
@@ -156,7 +156,7 @@ void foo(float *&lr, T *&tr) {
   // CK1:     store ptr [[PVTV]], ptr [[PVT:%.+]],
   // CK1:     [[TT1:%.+]] = load ptr, ptr [[PVT]],
   // CK1:     [[TT2:%.+]] = load ptr, ptr [[TT1]],
-  // CK1:     getelementptr inbounds float, ptr [[TT2]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[TT2]], i32 1
   #pragma omp target data map(lr[:10]) use_device_ptr(lr)
   {
     ++lr;
@@ -164,7 +164,7 @@ void foo(float *&lr, T *&tr) {
   // CK1:     call void @__tgt_target_data_end{{.+}}[[MTYPE05]]
   // CK1:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
   // CK1:     [[TTTT:%.+]] = load ptr, ptr [[TTT]],
-  // CK1:     getelementptr inbounds float, ptr [[TTTT]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[TTTT]], i32 1
   ++lr;
 
   // CK1:     [[T1:%.+]] = load ptr, ptr [[DECL:%.+]],
@@ -175,14 +175,14 @@ void foo(float *&lr, T *&tr) {
   // CK1-NOT: store ptr [[VAL]], ptr [[DECL]],
   // CK1:     store ptr [[VAL]], ptr [[PVT:%.+]],
   // CK1:     [[TT1:%.+]] = load ptr, ptr [[PVT]],
-  // CK1:     getelementptr inbounds i32, ptr [[TT1]], i32 1
+  // CK1:     getelementptr inbounds nuw i32, ptr [[TT1]], i32 1
   #pragma omp target data map(t[:10]) use_device_ptr(t)
   {
     ++t;
   }
   // CK1:     call void @__tgt_target_data_end{{.+}}[[MTYPE06]]
   // CK1:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
-  // CK1:     getelementptr inbounds i32, ptr [[TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw i32, ptr [[TTT]], i32 1
   ++t;
 
   // CK1:     [[T2:%.+]] = load ptr, ptr [[DECL:%.+]],
@@ -196,7 +196,7 @@ void foo(float *&lr, T *&tr) {
   // CK1:     store ptr [[PVTV]], ptr [[PVT:%.+]],
   // CK1:     [[TT1:%.+]] = load ptr, ptr [[PVT]],
   // CK1:     [[TT2:%.+]] = load ptr, ptr [[TT1]],
-  // CK1:     getelementptr inbounds i32, ptr [[TT2]], i32 1
+  // CK1:     getelementptr inbounds nuw i32, ptr [[TT2]], i32 1
   #pragma omp target data map(tr[:10]) use_device_ptr(tr)
   {
     ++tr;
@@ -204,7 +204,7 @@ void foo(float *&lr, T *&tr) {
   // CK1:     call void @__tgt_target_data_end{{.+}}[[MTYPE07]]
   // CK1:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
   // CK1:     [[TTTT:%.+]] = load ptr, ptr [[TTT]],
-  // CK1:     getelementptr inbounds i32, ptr [[TTTT]], i32 1
+  // CK1:     getelementptr inbounds nuw i32, ptr [[TTTT]], i32 1
   ++tr;
 
   // CK1:     [[T1:%.+]] = load ptr, ptr [[DECL:%.+]],
@@ -215,14 +215,14 @@ void foo(float *&lr, T *&tr) {
   // CK1-NOT: store ptr [[VAL]], ptr [[DECL]],
   // CK1:     store ptr [[VAL]], ptr [[PVT:%.+]],
   // CK1:     [[TT1:%.+]] = load ptr, ptr [[PVT]],
-  // CK1:     getelementptr inbounds float, ptr [[TT1]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[TT1]], i32 1
   #pragma omp target data map(l[:10], t[:10]) use_device_ptr(l)
   {
     ++l; ++t;
   }
   // CK1:     call void @__tgt_target_data_end{{.+}}[[MTYPE08]]
   // CK1:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
-  // CK1:     getelementptr inbounds float, ptr [[TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[TTT]], i32 1
   ++l; ++t;
 
 
@@ -232,18 +232,18 @@ void foo(float *&lr, T *&tr) {
   // CK1:     [[VAL:%.+]] = load ptr, ptr {{%.+}},
   // CK1:     store ptr [[VAL]], ptr [[PVT:%.+]],
   // CK1:     [[_TT1:%.+]] = load ptr, ptr [[_PVT]],
-  // CK1:     getelementptr inbounds float, ptr [[_TT1]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[_TT1]], i32 1
   // CK1:     [[TT1:%.+]] = load ptr, ptr [[PVT]],
-  // CK1:     getelementptr inbounds i32, ptr [[TT1]], i32 1
+  // CK1:     getelementptr inbounds nuw i32, ptr [[TT1]], i32 1
   #pragma omp target data map(l[:10], t[:10]) use_device_ptr(l) use_device_ptr(t)
   {
     ++l; ++t;
   }
   // CK1:     call void @__tgt_target_data_end{{.+}}[[MTYPE09]]
   // CK1:     [[_TTT:%.+]] = load ptr, ptr {{%.+}},
-  // CK1:     getelementptr inbounds float, ptr [[_TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[_TTT]], i32 1
   // CK1:     [[TTT:%.+]] = load ptr, ptr {{%.+}},
-  // CK1:     getelementptr inbounds i32, ptr [[TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw i32, ptr [[TTT]], i32 1
   ++l; ++t;
 
   // CK1:     call void @__tgt_target_data_begin{{.+}}[[MTYPE10]]
@@ -252,18 +252,18 @@ void foo(float *&lr, T *&tr) {
   // CK1:     [[VAL:%.+]] = load ptr, ptr {{%.+}},
   // CK1:     store ptr [[VAL]], ptr [[PVT:%.+]],
   // CK1:     [[_TT1:%.+]] = load ptr, ptr [[_PVT]],
-  // CK1:     getelementptr inbounds float, ptr [[_TT1]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[_TT1]], i32 1
   // CK1:     [[TT1:%.+]] = load ptr, ptr [[PVT]],
-  // CK1:     getelementptr inbounds i32, ptr [[TT1]], i32 1
+  // CK1:     getelementptr inbounds nuw i32, ptr [[TT1]], i32 1
   #pragma omp target data map(l[:10], t[:10]) use_device_ptr(l,t)
   {
     ++l; ++t;
   }
   // CK1:     call void @__tgt_target_data_end{{.+}}[[MTYPE10]]
   // CK1:     [[_TTT:%.+]] = load ptr, ptr {{%.+}},
-  // CK1:     getelementptr inbounds float, ptr [[_TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw float, ptr [[_TTT]], i32 1
   // CK1:     [[TTT:%.+]] = load ptr, ptr {{%.+}},
-  // CK1:     getelementptr inbounds i32, ptr [[TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw i32, ptr [[TTT]], i32 1
   ++l; ++t;
 
   // CK1:     [[T1:%.+]] = load ptr, ptr [[DECL:%.+]],
@@ -274,14 +274,14 @@ void foo(float *&lr, T *&tr) {
   // CK1-NOT: store ptr [[VAL]], ptr [[DECL]],
   // CK1:     store ptr [[VAL]], ptr [[PVT:%.+]],
   // CK1:     [[TT1:%.+]] = load ptr, ptr [[PVT]],
-  // CK1:     getelementptr inbounds i32, ptr [[TT1]], i32 1
+  // CK1:     getelementptr inbounds nuw i32, ptr [[TT1]], i32 1
   #pragma omp target data map(l[:10]) use_device_ptr(t)
   {
     ++l; ++t;
   }
   // CK1:     call void @__tgt_target_data_end{{.+}}[[MTYPE11]]
   // CK1:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
-  // CK1:     getelementptr inbounds i32, ptr [[TTT]], i32 1
+  // CK1:     getelementptr inbounds nuw i32, ptr [[TTT]], i32 1
   ++l; ++t;
 
   // CK1:     [[T2:%.+]] = load ptr, ptr [[DECL:%.+]],
@@ -295,7 +295,7 @@ void foo(float *&lr, T *&tr) {
   // CK1:     store ptr [[PVTV]], ptr [[PVT:%.+]],
   // CK1:     [[TT1:%.+]] = load ptr, ptr [[PVT]],
   // CK1:     [[TT2:%.+]] = load ptr, ptr [[TT1]],
-  // CK1:     getelementptr inbounds i32, ptr [[TT2]], i32 1
+  // CK1:     getelementptr inbounds nuw i32, ptr [[TT2]], i32 1
   #pragma omp target data map(l[:10]) use_device_ptr(tr)
   {
     ++l; ++tr;
@@ -303,7 +303,7 @@ void foo(float *&lr, T *&tr) {
   // CK1:     call void @__tgt_target_data_end{{.+}}[[MTYPE12]]
   // CK1:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
   // CK1:     [[TTTT:%.+]] = load ptr, ptr [[TTT]],
-  // CK1:     getelementptr inbounds i32, ptr [[TTTT]], i32 1
+  // CK1:     getelementptr inbounds nuw i32, ptr [[TTTT]], i32 1
   ++l; ++tr;
 
 }
@@ -354,7 +354,7 @@ struct ST {
     // CK2:     store ptr [[PVT]], ptr [[PVT2:%.+]],
     // CK2:     [[TT1:%.+]] = load ptr, ptr [[PVT2]],
     // CK2:     [[TT2:%.+]] = load ptr, ptr [[TT1]],
-    // CK2:     getelementptr inbounds double, ptr [[TT2]], i32 1
+    // CK2:     getelementptr inbounds nuw double, ptr [[TT2]], i32 1
     #pragma omp target data map(a[:10]) use_device_ptr(a)
     {
       a++;
@@ -362,7 +362,7 @@ struct ST {
     // CK2:     call void @__tgt_target_data_end{{.+}}[[MTYPE00]]
     // CK2:     [[DECL:%.+]] = getelementptr inbounds nuw [[ST]], ptr %this1, i32 0, i32 0
     // CK2:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
-    // CK2:     getelementptr inbounds double, ptr [[TTT]], i32 1
+    // CK2:     getelementptr inbounds nuw double, ptr [[TTT]], i32 1
     a++;
 
     // CK2:     [[BP:%.+]] = getelementptr inbounds [2 x ptr], ptr %{{.+}}, i32 0, i32 1
@@ -373,7 +373,7 @@ struct ST {
     // CK2:     store ptr [[PVT]], ptr [[PVT2:%.+]],
     // CK2:     [[TT1:%.+]] = load ptr, ptr [[PVT2]],
     // CK2:     [[TT2:%.+]] = load ptr, ptr [[TT1]],
-    // CK2:     getelementptr inbounds double, ptr [[TT2]], i32 1
+    // CK2:     getelementptr inbounds nuw double, ptr [[TT2]], i32 1
     #pragma omp target data map(b[:10]) use_device_ptr(b)
     {
       b++;
@@ -382,7 +382,7 @@ struct ST {
     // CK2:     [[DECL:%.+]] = getelementptr inbounds nuw [[ST]], ptr %{{.+}}, i32 0, i32 1
     // CK2:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
     // CK2:     [[TTTT:%.+]] = load ptr, ptr [[TTT]],
-    // CK2:     getelementptr inbounds double, ptr [[TTTT]], i32 1
+    // CK2:     getelementptr inbounds nuw double, ptr [[TTTT]], i32 1
     b++;
 
     // CK2:     [[BP:%.+]] = getelementptr inbounds [3 x ptr], ptr %{{.+}}, i32 0, i32 2
@@ -393,7 +393,7 @@ struct ST {
     // CK2:     store ptr [[PVT]], ptr [[PVT2:%.+]],
     // CK2:     [[TT1:%.+]] = load ptr, ptr [[PVT2]],
     // CK2:     [[TT2:%.+]] = load ptr, ptr [[TT1]],
-    // CK2:     getelementptr inbounds double, ptr [[TT2]], i32 1
+    // CK2:     getelementptr inbounds nuw double, ptr [[TT2]], i32 1
     #pragma omp target data map(la[:10]) use_device_ptr(a)
     {
       a++;
@@ -402,7 +402,7 @@ struct ST {
     // CK2:     call void @__tgt_target_data_end{{.+}}[[MTYPE02]]
     // CK2:     [[DECL:%.+]] = getelementptr inbounds nuw [[ST]], ptr %this1, i32 0, i32 0
     // CK2:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
-    // CK2:     getelementptr inbounds double, ptr [[TTT]], i32 1
+    // CK2:     getelementptr inbounds nuw double, ptr [[TTT]], i32 1
     a++;
     la++;
 
@@ -419,10 +419,10 @@ struct ST {
     // CK2:     store ptr [[PVT1]], ptr [[_PVT1:%.+]],
     // CK2:     [[TT2:%.+]] = load ptr, ptr [[_PVT2]],
     // CK2:     [[_TT2:%.+]] = load ptr, ptr [[TT2]],
-    // CK2:     getelementptr inbounds double, ptr [[_TT2]], i32 1
+    // CK2:     getelementptr inbounds nuw double, ptr [[_TT2]], i32 1
     // CK2:     [[TT1:%.+]] = load ptr, ptr [[_PVT1]],
     // CK2:     [[_TT1:%.+]] = load ptr, ptr [[TT1]],
-    // CK2:     getelementptr inbounds double, ptr [[_TT1]], i32 1
+    // CK2:     getelementptr inbounds nuw double, ptr [[_TT1]], i32 1
     #pragma omp target data map(b[:10]) use_device_ptr(a, b)
     {
       a++;
@@ -431,11 +431,11 @@ struct ST {
     // CK2:     call void @__tgt_target_data_end{{.+}}[[MTYPE03]]
     // CK2:     [[DECL:%.+]] = getelementptr inbounds nuw [[ST]], ptr %this1, i32 0, i32 0
     // CK2:     [[TTT:%.+]] = load ptr, ptr [[DECL]],
-    // CK2:     getelementptr inbounds double, ptr [[TTT]], i32 1
+    // CK2:     getelementptr inbounds nuw double, ptr [[TTT]], i32 1
     // CK2:     [[_DECL:%.+]] = getelementptr inbounds nuw [[ST]], ptr %this1, i32 0, i32 1
     // CK2:     [[_TTT:%.+]] = load ptr, ptr [[_DECL]],
     // CK2:     [[_TTTT:%.+]] = load ptr, ptr [[_TTT]],
-    // CK2:     getelementptr inbounds double, ptr [[_TTTT]], i32 1
+    // CK2:     getelementptr inbounds nuw double, ptr [[_TTTT]], i32 1
     a++;
     b++;
   }
diff --git clang/test/OpenMP/target_has_device_addr_codegen.cpp clang/test/OpenMP/target_has_device_addr_codegen.cpp
index 08bcc87ca5f0..39eaedb0e48d 100644
--- clang/test/OpenMP/target_has_device_addr_codegen.cpp
+++ clang/test/OpenMP/target_has_device_addr_codegen.cpp
@@ -586,7 +586,7 @@ void use_template() {
 // CHECK-NEXT:    store ptr [[K]], ptr [[K_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8
 // CHECK-NEXT:    ret void
 //
@@ -601,7 +601,7 @@ void use_template() {
 // CHECK-NEXT:    store ptr [[TMP0]], ptr [[TMP]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 1
 // CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8
 // CHECK-NEXT:    ret void
 //
@@ -1079,7 +1079,7 @@ void use_template() {
 // CHECK-NEXT:    store ptr [[K]], ptr [[K_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8
 // CHECK-NEXT:    ret void
 //
@@ -1094,7 +1094,7 @@ void use_template() {
 // CHECK-NEXT:    store ptr [[TMP0]], ptr [[TMP]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 1
 // CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8
 // CHECK-NEXT:    ret void
 //
@@ -1133,7 +1133,7 @@ void use_template() {
 // CHECK-NEXT:    store ptr [[K]], ptr [[K_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i32 1
 // CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8
 // CHECK-NEXT:    ret void
 //
@@ -1148,7 +1148,7 @@ void use_template() {
 // CHECK-NEXT:    store ptr [[TMP0]], ptr [[TMP]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP2]], i32 1
 // CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8
 // CHECK-NEXT:    ret void
 //
@@ -1422,14 +1422,14 @@ void use_template() {
 // SIMD-ONLY0-NEXT:    store ptr [[K]], ptr [[Z]], align 8
 // SIMD-ONLY0-NEXT:    store ptr [[AA]], ptr [[RAA]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[K]], align 8
-// SIMD-ONLY0-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// SIMD-ONLY0-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // SIMD-ONLY0-NEXT:    store ptr [[INCDEC_PTR]], ptr [[K]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[Z]], align 8
 // SIMD-ONLY0-NEXT:    store ptr [[TMP2]], ptr [[TMP]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[Z]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
-// SIMD-ONLY0-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1
+// SIMD-ONLY0-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i32 1
 // SIMD-ONLY0-NEXT:    store ptr [[INCDEC_PTR1]], ptr [[TMP4]], align 8
 // SIMD-ONLY0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[AA]], i64 0, i64 0
 // SIMD-ONLY0-NEXT:    store i32 1, ptr [[ARRAYIDX]], align 4
@@ -1478,14 +1478,14 @@ void use_template() {
 // SIMD-ONLY0-NEXT:    store ptr [[TMP0]], ptr [[K]], align 8
 // SIMD-ONLY0-NEXT:    store ptr [[K]], ptr [[Z]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[K]], align 8
-// SIMD-ONLY0-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// SIMD-ONLY0-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // SIMD-ONLY0-NEXT:    store ptr [[INCDEC_PTR]], ptr [[K]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[Z]], align 8
 // SIMD-ONLY0-NEXT:    store ptr [[TMP2]], ptr [[TMP]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[Z]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
-// SIMD-ONLY0-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1
+// SIMD-ONLY0-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i32 1
 // SIMD-ONLY0-NEXT:    store ptr [[INCDEC_PTR1]], ptr [[TMP4]], align 8
 // SIMD-ONLY0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[AA]], i64 0, i64 0
 // SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -1520,14 +1520,14 @@ void use_template() {
 // SIMD-ONLY0-NEXT:    store ptr [[TMP0]], ptr [[K]], align 8
 // SIMD-ONLY0-NEXT:    store ptr [[K]], ptr [[Z]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[K]], align 8
-// SIMD-ONLY0-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1
+// SIMD-ONLY0-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i32 1
 // SIMD-ONLY0-NEXT:    store ptr [[INCDEC_PTR]], ptr [[K]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[Z]], align 8
 // SIMD-ONLY0-NEXT:    store ptr [[TMP2]], ptr [[TMP]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[Z]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8
 // SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
-// SIMD-ONLY0-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 1
+// SIMD-ONLY0-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i32 1
 // SIMD-ONLY0-NEXT:    store ptr [[INCDEC_PTR1]], ptr [[TMP4]], align 8
 // SIMD-ONLY0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x ptr], ptr [[AA]], i64 0, i64 0
 // SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
diff --git clang/test/OpenMP/target_in_reduction_codegen.cpp clang/test/OpenMP/target_in_reduction_codegen.cpp
index fb715e2de2a5..56191ee57513 100644
--- clang/test/OpenMP/target_in_reduction_codegen.cpp
+++ clang/test/OpenMP/target_in_reduction_codegen.cpp
@@ -70,7 +70,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP2]], align 16
 // CHECK1-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
 // CHECK1-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[A]], ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -85,7 +85,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP11]], align 8
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP12]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[B]], ptr [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 1
@@ -100,7 +100,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..2, ptr [[TMP21]], align 8
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP22]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC_ADDR]], ptr [[TMP24]], align 8
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 1
@@ -118,7 +118,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP35:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 3, ptr [[DOTRD_INPUT_]])
 // CHECK1-NEXT:    store ptr [[TMP35]], ptr [[DOTTASK_RED_]], align 8
 // CHECK1-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[C]], ptr [[TMP36]], align 8
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
@@ -133,7 +133,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..6, ptr [[TMP43]], align 8
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP44]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP46]], align 8
 // CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1
diff --git clang/test/OpenMP/target_is_device_ptr_codegen.cpp clang/test/OpenMP/target_is_device_ptr_codegen.cpp
index 3a1c168533c3..505c34e21733 100644
--- clang/test/OpenMP/target_is_device_ptr_codegen.cpp
+++ clang/test/OpenMP/target_is_device_ptr_codegen.cpp
@@ -2142,7 +2142,7 @@ void bar() {
 // CK10-NEXT:    [[G_ADDR:%.*]] = alloca ptr, align 8
 // CK10-NEXT:    store ptr [[G]], ptr [[G_ADDR]], align 8
 // CK10-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8
-// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // CK10-NEXT:    store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 8
 // CK10-NEXT:    ret void
 //
@@ -2153,7 +2153,7 @@ void bar() {
 // CK10-NEXT:    [[L_ADDR:%.*]] = alloca ptr, align 8
 // CK10-NEXT:    store ptr [[L]], ptr [[L_ADDR]], align 8
 // CK10-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 8
-// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1
+// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 1
 // CK10-NEXT:    store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 8
 // CK10-NEXT:    ret void
 //
@@ -2164,7 +2164,7 @@ void bar() {
 // CK10-NEXT:    [[T_ADDR:%.*]] = alloca ptr, align 8
 // CK10-NEXT:    store ptr [[T]], ptr [[T_ADDR]], align 8
 // CK10-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8
-// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1
+// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 1
 // CK10-NEXT:    store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 8
 // CK10-NEXT:    ret void
 //
@@ -2178,7 +2178,7 @@ void bar() {
 // CK10-NEXT:    store ptr [[LR_ADDR]], ptr [[TMP]], align 8
 // CK10-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8
 // CK10-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1
+// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1
 // CK10-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8
 // CK10-NEXT:    ret void
 //
@@ -2192,7 +2192,7 @@ void bar() {
 // CK10-NEXT:    store ptr [[TR_ADDR]], ptr [[TMP]], align 8
 // CK10-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8
 // CK10-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CK10-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8
 // CK10-NEXT:    ret void
 //
@@ -2206,7 +2206,7 @@ void bar() {
 // CK10-NEXT:    store ptr [[TR_ADDR]], ptr [[TMP]], align 8
 // CK10-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8
 // CK10-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CK10-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8
 // CK10-NEXT:    ret void
 //
@@ -2224,11 +2224,11 @@ void bar() {
 // CK10-NEXT:    store ptr [[LR_ADDR]], ptr [[_TMP1]], align 8
 // CK10-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8
 // CK10-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CK10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CK10-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8
 // CK10-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8
 // CK10-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
-// CK10-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1
+// CK10-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 1
 // CK10-NEXT:    store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 8
 // CK10-NEXT:    ret void
 //
@@ -2613,7 +2613,7 @@ void bar() {
 // CK11-NEXT:    [[G_ADDR:%.*]] = alloca ptr, align 8
 // CK11-NEXT:    store ptr [[G]], ptr [[G_ADDR]], align 8
 // CK11-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8
-// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // CK11-NEXT:    store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 8
 // CK11-NEXT:    ret void
 //
@@ -2624,7 +2624,7 @@ void bar() {
 // CK11-NEXT:    [[L_ADDR:%.*]] = alloca ptr, align 8
 // CK11-NEXT:    store ptr [[L]], ptr [[L_ADDR]], align 8
 // CK11-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 8
-// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1
+// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 1
 // CK11-NEXT:    store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 8
 // CK11-NEXT:    ret void
 //
@@ -2635,7 +2635,7 @@ void bar() {
 // CK11-NEXT:    [[T_ADDR:%.*]] = alloca ptr, align 8
 // CK11-NEXT:    store ptr [[T]], ptr [[T_ADDR]], align 8
 // CK11-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8
-// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1
+// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 1
 // CK11-NEXT:    store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 8
 // CK11-NEXT:    ret void
 //
@@ -2649,7 +2649,7 @@ void bar() {
 // CK11-NEXT:    store ptr [[LR_ADDR]], ptr [[TMP]], align 8
 // CK11-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8
 // CK11-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1
+// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1
 // CK11-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8
 // CK11-NEXT:    ret void
 //
@@ -2663,7 +2663,7 @@ void bar() {
 // CK11-NEXT:    store ptr [[TR_ADDR]], ptr [[TMP]], align 8
 // CK11-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8
 // CK11-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CK11-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8
 // CK11-NEXT:    ret void
 //
@@ -2677,7 +2677,7 @@ void bar() {
 // CK11-NEXT:    store ptr [[TR_ADDR]], ptr [[TMP]], align 8
 // CK11-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8
 // CK11-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CK11-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8
 // CK11-NEXT:    ret void
 //
@@ -2695,11 +2695,11 @@ void bar() {
 // CK11-NEXT:    store ptr [[LR_ADDR]], ptr [[_TMP1]], align 8
 // CK11-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8
 // CK11-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
-// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CK11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CK11-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8
 // CK11-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8
 // CK11-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
-// CK11-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1
+// CK11-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 1
 // CK11-NEXT:    store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 8
 // CK11-NEXT:    ret void
 //
@@ -3084,7 +3084,7 @@ void bar() {
 // CK12-NEXT:    [[G_ADDR:%.*]] = alloca ptr, align 4
 // CK12-NEXT:    store ptr [[G]], ptr [[G_ADDR]], align 4
 // CK12-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4
-// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // CK12-NEXT:    store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 4
 // CK12-NEXT:    ret void
 //
@@ -3095,7 +3095,7 @@ void bar() {
 // CK12-NEXT:    [[L_ADDR:%.*]] = alloca ptr, align 4
 // CK12-NEXT:    store ptr [[L]], ptr [[L_ADDR]], align 4
 // CK12-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 4
-// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1
+// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 1
 // CK12-NEXT:    store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 4
 // CK12-NEXT:    ret void
 //
@@ -3106,7 +3106,7 @@ void bar() {
 // CK12-NEXT:    [[T_ADDR:%.*]] = alloca ptr, align 4
 // CK12-NEXT:    store ptr [[T]], ptr [[T_ADDR]], align 4
 // CK12-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 4
-// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1
+// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 1
 // CK12-NEXT:    store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 4
 // CK12-NEXT:    ret void
 //
@@ -3120,7 +3120,7 @@ void bar() {
 // CK12-NEXT:    store ptr [[LR_ADDR]], ptr [[TMP]], align 4
 // CK12-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4
 // CK12-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4
-// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1
+// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1
 // CK12-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4
 // CK12-NEXT:    ret void
 //
@@ -3134,7 +3134,7 @@ void bar() {
 // CK12-NEXT:    store ptr [[TR_ADDR]], ptr [[TMP]], align 4
 // CK12-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4
 // CK12-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4
-// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CK12-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4
 // CK12-NEXT:    ret void
 //
@@ -3148,7 +3148,7 @@ void bar() {
 // CK12-NEXT:    store ptr [[TR_ADDR]], ptr [[TMP]], align 4
 // CK12-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4
 // CK12-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4
-// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CK12-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4
 // CK12-NEXT:    ret void
 //
@@ -3166,11 +3166,11 @@ void bar() {
 // CK12-NEXT:    store ptr [[LR_ADDR]], ptr [[_TMP1]], align 4
 // CK12-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4
 // CK12-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4
-// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CK12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CK12-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4
 // CK12-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 4
 // CK12-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4
-// CK12-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1
+// CK12-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 1
 // CK12-NEXT:    store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 4
 // CK12-NEXT:    ret void
 //
@@ -3555,7 +3555,7 @@ void bar() {
 // CK13-NEXT:    [[G_ADDR:%.*]] = alloca ptr, align 4
 // CK13-NEXT:    store ptr [[G]], ptr [[G_ADDR]], align 4
 // CK13-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4
-// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // CK13-NEXT:    store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 4
 // CK13-NEXT:    ret void
 //
@@ -3566,7 +3566,7 @@ void bar() {
 // CK13-NEXT:    [[L_ADDR:%.*]] = alloca ptr, align 4
 // CK13-NEXT:    store ptr [[L]], ptr [[L_ADDR]], align 4
 // CK13-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 4
-// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1
+// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 1
 // CK13-NEXT:    store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 4
 // CK13-NEXT:    ret void
 //
@@ -3577,7 +3577,7 @@ void bar() {
 // CK13-NEXT:    [[T_ADDR:%.*]] = alloca ptr, align 4
 // CK13-NEXT:    store ptr [[T]], ptr [[T_ADDR]], align 4
 // CK13-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 4
-// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1
+// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 1
 // CK13-NEXT:    store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 4
 // CK13-NEXT:    ret void
 //
@@ -3591,7 +3591,7 @@ void bar() {
 // CK13-NEXT:    store ptr [[LR_ADDR]], ptr [[TMP]], align 4
 // CK13-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4
 // CK13-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4
-// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1
+// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1
 // CK13-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4
 // CK13-NEXT:    ret void
 //
@@ -3605,7 +3605,7 @@ void bar() {
 // CK13-NEXT:    store ptr [[TR_ADDR]], ptr [[TMP]], align 4
 // CK13-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4
 // CK13-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4
-// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CK13-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4
 // CK13-NEXT:    ret void
 //
@@ -3619,7 +3619,7 @@ void bar() {
 // CK13-NEXT:    store ptr [[TR_ADDR]], ptr [[TMP]], align 4
 // CK13-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4
 // CK13-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4
-// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CK13-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4
 // CK13-NEXT:    ret void
 //
@@ -3637,11 +3637,11 @@ void bar() {
 // CK13-NEXT:    store ptr [[LR_ADDR]], ptr [[_TMP1]], align 4
 // CK13-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4
 // CK13-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4
-// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+// CK13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1
 // CK13-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4
 // CK13-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 4
 // CK13-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4
-// CK13-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1
+// CK13-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 1
 // CK13-NEXT:    store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 4
 // CK13-NEXT:    ret void
 //
@@ -3674,34 +3674,34 @@ void bar() {
 // SIMD-ONLY00-NEXT:    store ptr [[LR]], ptr [[LR_ADDR]], align 8
 // SIMD-ONLY00-NEXT:    store ptr [[TR]], ptr [[TR_ADDR]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP0:%.*]] = load ptr, ptr @g, align 8
-// SIMD-ONLY00-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// SIMD-ONLY00-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // SIMD-ONLY00-NEXT:    store ptr [[INCDEC_PTR]], ptr @g, align 8
 // SIMD-ONLY00-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[L]], align 8
-// SIMD-ONLY00-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1
+// SIMD-ONLY00-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1
 // SIMD-ONLY00-NEXT:    store ptr [[INCDEC_PTR1]], ptr [[L]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[T]], align 8
-// SIMD-ONLY00-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1
+// SIMD-ONLY00-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 1
 // SIMD-ONLY00-NEXT:    store ptr [[INCDEC_PTR2]], ptr [[T]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 8
 // SIMD-ONLY00-NEXT:    store ptr [[TMP3]], ptr [[TMP]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
-// SIMD-ONLY00-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1
+// SIMD-ONLY00-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 1
 // SIMD-ONLY00-NEXT:    store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 8
 // SIMD-ONLY00-NEXT:    store ptr [[TMP7]], ptr [[_TMP4]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// SIMD-ONLY00-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1
+// SIMD-ONLY00-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP10]], i32 1
 // SIMD-ONLY00-NEXT:    store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 8
 // SIMD-ONLY00-NEXT:    store ptr [[TMP11]], ptr [[_TMP6]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
-// SIMD-ONLY00-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1
+// SIMD-ONLY00-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP14]], i32 1
 // SIMD-ONLY00-NEXT:    store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 8
 // SIMD-ONLY00-NEXT:    store ptr [[TMP15]], ptr [[_TMP8]], align 8
@@ -3711,11 +3711,11 @@ void bar() {
 // SIMD-ONLY00-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8
-// SIMD-ONLY00-NEXT:    [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1
+// SIMD-ONLY00-NEXT:    [[INCDEC_PTR10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i32 1
 // SIMD-ONLY00-NEXT:    store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 8
 // SIMD-ONLY00-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8
-// SIMD-ONLY00-NEXT:    [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1
+// SIMD-ONLY00-NEXT:    [[INCDEC_PTR11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i32 1
 // SIMD-ONLY00-NEXT:    store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 8
 // SIMD-ONLY00-NEXT:    ret void
 //
@@ -3748,34 +3748,34 @@ void bar() {
 // SIMD-ONLY01-NEXT:    store ptr [[LR]], ptr [[LR_ADDR]], align 8
 // SIMD-ONLY01-NEXT:    store ptr [[TR]], ptr [[TR_ADDR]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP0:%.*]] = load ptr, ptr @g, align 8
-// SIMD-ONLY01-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// SIMD-ONLY01-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // SIMD-ONLY01-NEXT:    store ptr [[INCDEC_PTR]], ptr @g, align 8
 // SIMD-ONLY01-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[L]], align 8
-// SIMD-ONLY01-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1
+// SIMD-ONLY01-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1
 // SIMD-ONLY01-NEXT:    store ptr [[INCDEC_PTR1]], ptr [[L]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[T]], align 8
-// SIMD-ONLY01-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1
+// SIMD-ONLY01-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 1
 // SIMD-ONLY01-NEXT:    store ptr [[INCDEC_PTR2]], ptr [[T]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 8
 // SIMD-ONLY01-NEXT:    store ptr [[TMP3]], ptr [[TMP]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
-// SIMD-ONLY01-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1
+// SIMD-ONLY01-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 1
 // SIMD-ONLY01-NEXT:    store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 8
 // SIMD-ONLY01-NEXT:    store ptr [[TMP7]], ptr [[_TMP4]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// SIMD-ONLY01-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1
+// SIMD-ONLY01-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP10]], i32 1
 // SIMD-ONLY01-NEXT:    store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 8
 // SIMD-ONLY01-NEXT:    store ptr [[TMP11]], ptr [[_TMP6]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
-// SIMD-ONLY01-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1
+// SIMD-ONLY01-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP14]], i32 1
 // SIMD-ONLY01-NEXT:    store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 8
 // SIMD-ONLY01-NEXT:    store ptr [[TMP15]], ptr [[_TMP8]], align 8
@@ -3785,11 +3785,11 @@ void bar() {
 // SIMD-ONLY01-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8
-// SIMD-ONLY01-NEXT:    [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1
+// SIMD-ONLY01-NEXT:    [[INCDEC_PTR10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i32 1
 // SIMD-ONLY01-NEXT:    store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 8
 // SIMD-ONLY01-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8
-// SIMD-ONLY01-NEXT:    [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1
+// SIMD-ONLY01-NEXT:    [[INCDEC_PTR11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i32 1
 // SIMD-ONLY01-NEXT:    store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 8
 // SIMD-ONLY01-NEXT:    ret void
 //
@@ -3822,34 +3822,34 @@ void bar() {
 // SIMD-ONLY02-NEXT:    store ptr [[LR]], ptr [[LR_ADDR]], align 4
 // SIMD-ONLY02-NEXT:    store ptr [[TR]], ptr [[TR_ADDR]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP0:%.*]] = load ptr, ptr @g, align 4
-// SIMD-ONLY02-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// SIMD-ONLY02-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // SIMD-ONLY02-NEXT:    store ptr [[INCDEC_PTR]], ptr @g, align 4
 // SIMD-ONLY02-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[L]], align 4
-// SIMD-ONLY02-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1
+// SIMD-ONLY02-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1
 // SIMD-ONLY02-NEXT:    store ptr [[INCDEC_PTR1]], ptr [[L]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[T]], align 4
-// SIMD-ONLY02-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1
+// SIMD-ONLY02-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 1
 // SIMD-ONLY02-NEXT:    store ptr [[INCDEC_PTR2]], ptr [[T]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 4
 // SIMD-ONLY02-NEXT:    store ptr [[TMP3]], ptr [[TMP]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4
-// SIMD-ONLY02-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1
+// SIMD-ONLY02-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 1
 // SIMD-ONLY02-NEXT:    store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 4
 // SIMD-ONLY02-NEXT:    store ptr [[TMP7]], ptr [[_TMP4]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// SIMD-ONLY02-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1
+// SIMD-ONLY02-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP10]], i32 1
 // SIMD-ONLY02-NEXT:    store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 4
 // SIMD-ONLY02-NEXT:    store ptr [[TMP11]], ptr [[_TMP6]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// SIMD-ONLY02-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1
+// SIMD-ONLY02-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP14]], i32 1
 // SIMD-ONLY02-NEXT:    store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 4
 // SIMD-ONLY02-NEXT:    store ptr [[TMP15]], ptr [[_TMP8]], align 4
@@ -3859,11 +3859,11 @@ void bar() {
 // SIMD-ONLY02-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4
-// SIMD-ONLY02-NEXT:    [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1
+// SIMD-ONLY02-NEXT:    [[INCDEC_PTR10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i32 1
 // SIMD-ONLY02-NEXT:    store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 4
 // SIMD-ONLY02-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4
-// SIMD-ONLY02-NEXT:    [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1
+// SIMD-ONLY02-NEXT:    [[INCDEC_PTR11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i32 1
 // SIMD-ONLY02-NEXT:    store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 4
 // SIMD-ONLY02-NEXT:    ret void
 //
@@ -3896,34 +3896,34 @@ void bar() {
 // SIMD-ONLY03-NEXT:    store ptr [[LR]], ptr [[LR_ADDR]], align 4
 // SIMD-ONLY03-NEXT:    store ptr [[TR]], ptr [[TR_ADDR]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP0:%.*]] = load ptr, ptr @g, align 4
-// SIMD-ONLY03-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// SIMD-ONLY03-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // SIMD-ONLY03-NEXT:    store ptr [[INCDEC_PTR]], ptr @g, align 4
 // SIMD-ONLY03-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[L]], align 4
-// SIMD-ONLY03-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1
+// SIMD-ONLY03-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1
 // SIMD-ONLY03-NEXT:    store ptr [[INCDEC_PTR1]], ptr [[L]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[T]], align 4
-// SIMD-ONLY03-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1
+// SIMD-ONLY03-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 1
 // SIMD-ONLY03-NEXT:    store ptr [[INCDEC_PTR2]], ptr [[T]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 4
 // SIMD-ONLY03-NEXT:    store ptr [[TMP3]], ptr [[TMP]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4
-// SIMD-ONLY03-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1
+// SIMD-ONLY03-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 1
 // SIMD-ONLY03-NEXT:    store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 4
 // SIMD-ONLY03-NEXT:    store ptr [[TMP7]], ptr [[_TMP4]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// SIMD-ONLY03-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1
+// SIMD-ONLY03-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP10]], i32 1
 // SIMD-ONLY03-NEXT:    store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 4
 // SIMD-ONLY03-NEXT:    store ptr [[TMP11]], ptr [[_TMP6]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// SIMD-ONLY03-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1
+// SIMD-ONLY03-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP14]], i32 1
 // SIMD-ONLY03-NEXT:    store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 4
 // SIMD-ONLY03-NEXT:    store ptr [[TMP15]], ptr [[_TMP8]], align 4
@@ -3933,11 +3933,11 @@ void bar() {
 // SIMD-ONLY03-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4
-// SIMD-ONLY03-NEXT:    [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1
+// SIMD-ONLY03-NEXT:    [[INCDEC_PTR10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i32 1
 // SIMD-ONLY03-NEXT:    store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 4
 // SIMD-ONLY03-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4
-// SIMD-ONLY03-NEXT:    [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1
+// SIMD-ONLY03-NEXT:    [[INCDEC_PTR11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i32 1
 // SIMD-ONLY03-NEXT:    store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 4
 // SIMD-ONLY03-NEXT:    ret void
 //
@@ -3951,7 +3951,7 @@ void bar() {
 // CK20-NEXT:    call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]])
 // CK20-NEXT:    call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]])
 // CK20-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8
-// CK20-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// CK20-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // CK20-NEXT:    store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8
 // CK20-NEXT:    ret void
 //
@@ -4185,7 +4185,7 @@ void bar() {
 // CK20-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CK20-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0
 // CK20-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8
-// CK20-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1
+// CK20-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1
 // CK20-NEXT:    store ptr [[INCDEC_PTR]], ptr [[A]], align 8
 // CK20-NEXT:    ret void
 //
@@ -4199,7 +4199,7 @@ void bar() {
 // CK20-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1
 // CK20-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[B]], align 8
 // CK20-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
-// CK20-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1
+// CK20-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1
 // CK20-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8
 // CK20-NEXT:    ret void
 //
@@ -4212,12 +4212,12 @@ void bar() {
 // CK20-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CK20-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0
 // CK20-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8
-// CK20-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1
+// CK20-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1
 // CK20-NEXT:    store ptr [[INCDEC_PTR]], ptr [[A]], align 8
 // CK20-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1
 // CK20-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[B]], align 8
 // CK20-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
-// CK20-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1
+// CK20-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1
 // CK20-NEXT:    store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 8
 // CK20-NEXT:    ret void
 //
@@ -4231,7 +4231,7 @@ void bar() {
 // CK21-NEXT:    call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]])
 // CK21-NEXT:    call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]])
 // CK21-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8
-// CK21-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// CK21-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // CK21-NEXT:    store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8
 // CK21-NEXT:    ret void
 //
@@ -4465,7 +4465,7 @@ void bar() {
 // CK21-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CK21-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0
 // CK21-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8
-// CK21-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1
+// CK21-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1
 // CK21-NEXT:    store ptr [[INCDEC_PTR]], ptr [[A]], align 8
 // CK21-NEXT:    ret void
 //
@@ -4479,7 +4479,7 @@ void bar() {
 // CK21-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1
 // CK21-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[B]], align 8
 // CK21-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
-// CK21-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1
+// CK21-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1
 // CK21-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8
 // CK21-NEXT:    ret void
 //
@@ -4492,12 +4492,12 @@ void bar() {
 // CK21-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CK21-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0
 // CK21-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8
-// CK21-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1
+// CK21-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1
 // CK21-NEXT:    store ptr [[INCDEC_PTR]], ptr [[A]], align 8
 // CK21-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1
 // CK21-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[B]], align 8
 // CK21-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
-// CK21-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1
+// CK21-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1
 // CK21-NEXT:    store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 8
 // CK21-NEXT:    ret void
 //
@@ -4511,7 +4511,7 @@ void bar() {
 // CK22-NEXT:    call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]])
 // CK22-NEXT:    call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]])
 // CK22-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4
-// CK22-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// CK22-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // CK22-NEXT:    store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4
 // CK22-NEXT:    ret void
 //
@@ -4745,7 +4745,7 @@ void bar() {
 // CK22-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CK22-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0
 // CK22-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 4
-// CK22-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1
+// CK22-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1
 // CK22-NEXT:    store ptr [[INCDEC_PTR]], ptr [[A]], align 4
 // CK22-NEXT:    ret void
 //
@@ -4759,7 +4759,7 @@ void bar() {
 // CK22-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1
 // CK22-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[B]], align 4
 // CK22-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4
-// CK22-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1
+// CK22-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1
 // CK22-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 4
 // CK22-NEXT:    ret void
 //
@@ -4772,12 +4772,12 @@ void bar() {
 // CK22-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CK22-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0
 // CK22-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 4
-// CK22-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1
+// CK22-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1
 // CK22-NEXT:    store ptr [[INCDEC_PTR]], ptr [[A]], align 4
 // CK22-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1
 // CK22-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[B]], align 4
 // CK22-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4
-// CK22-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1
+// CK22-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1
 // CK22-NEXT:    store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 4
 // CK22-NEXT:    ret void
 //
@@ -4791,7 +4791,7 @@ void bar() {
 // CK23-NEXT:    call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]])
 // CK23-NEXT:    call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]])
 // CK23-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4
-// CK23-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// CK23-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // CK23-NEXT:    store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4
 // CK23-NEXT:    ret void
 //
@@ -5025,7 +5025,7 @@ void bar() {
 // CK23-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CK23-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0
 // CK23-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 4
-// CK23-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1
+// CK23-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1
 // CK23-NEXT:    store ptr [[INCDEC_PTR]], ptr [[A]], align 4
 // CK23-NEXT:    ret void
 //
@@ -5039,7 +5039,7 @@ void bar() {
 // CK23-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1
 // CK23-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[B]], align 4
 // CK23-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4
-// CK23-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1
+// CK23-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1
 // CK23-NEXT:    store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 4
 // CK23-NEXT:    ret void
 //
@@ -5052,12 +5052,12 @@ void bar() {
 // CK23-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CK23-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0
 // CK23-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 4
-// CK23-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1
+// CK23-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1
 // CK23-NEXT:    store ptr [[INCDEC_PTR]], ptr [[A]], align 4
 // CK23-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1
 // CK23-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[B]], align 4
 // CK23-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4
-// CK23-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1
+// CK23-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1
 // CK23-NEXT:    store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 4
 // CK23-NEXT:    ret void
 //
@@ -5071,7 +5071,7 @@ void bar() {
 // SIMD-ONLY10-NEXT:    call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]])
 // SIMD-ONLY10-NEXT:    call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]])
 // SIMD-ONLY10-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8
-// SIMD-ONLY10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// SIMD-ONLY10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // SIMD-ONLY10-NEXT:    store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8
 // SIMD-ONLY10-NEXT:    ret void
 //
@@ -5101,21 +5101,21 @@ void bar() {
 // SIMD-ONLY10-NEXT:    store ptr null, ptr [[LA]], align 8
 // SIMD-ONLY10-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0
 // SIMD-ONLY10-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A]], align 8
-// SIMD-ONLY10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// SIMD-ONLY10-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // SIMD-ONLY10-NEXT:    store ptr [[INCDEC_PTR]], ptr [[A]], align 8
 // SIMD-ONLY10-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1
 // SIMD-ONLY10-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[B]], align 8
 // SIMD-ONLY10-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
-// SIMD-ONLY10-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1
+// SIMD-ONLY10-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1
 // SIMD-ONLY10-NEXT:    store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 8
 // SIMD-ONLY10-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0
 // SIMD-ONLY10-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[A3]], align 8
-// SIMD-ONLY10-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1
+// SIMD-ONLY10-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1
 // SIMD-ONLY10-NEXT:    store ptr [[INCDEC_PTR4]], ptr [[A3]], align 8
 // SIMD-ONLY10-NEXT:    [[B5:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1
 // SIMD-ONLY10-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[B5]], align 8
 // SIMD-ONLY10-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
-// SIMD-ONLY10-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1
+// SIMD-ONLY10-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 1
 // SIMD-ONLY10-NEXT:    store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 8
 // SIMD-ONLY10-NEXT:    ret void
 //
@@ -5145,7 +5145,7 @@ void bar() {
 // SIMD-ONLY11-NEXT:    call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]])
 // SIMD-ONLY11-NEXT:    call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]])
 // SIMD-ONLY11-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8
-// SIMD-ONLY11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// SIMD-ONLY11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // SIMD-ONLY11-NEXT:    store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8
 // SIMD-ONLY11-NEXT:    ret void
 //
@@ -5175,21 +5175,21 @@ void bar() {
 // SIMD-ONLY11-NEXT:    store ptr null, ptr [[LA]], align 8
 // SIMD-ONLY11-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0
 // SIMD-ONLY11-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A]], align 8
-// SIMD-ONLY11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// SIMD-ONLY11-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // SIMD-ONLY11-NEXT:    store ptr [[INCDEC_PTR]], ptr [[A]], align 8
 // SIMD-ONLY11-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1
 // SIMD-ONLY11-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[B]], align 8
 // SIMD-ONLY11-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
-// SIMD-ONLY11-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1
+// SIMD-ONLY11-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1
 // SIMD-ONLY11-NEXT:    store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 8
 // SIMD-ONLY11-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0
 // SIMD-ONLY11-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[A3]], align 8
-// SIMD-ONLY11-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1
+// SIMD-ONLY11-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1
 // SIMD-ONLY11-NEXT:    store ptr [[INCDEC_PTR4]], ptr [[A3]], align 8
 // SIMD-ONLY11-NEXT:    [[B5:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1
 // SIMD-ONLY11-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[B5]], align 8
 // SIMD-ONLY11-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
-// SIMD-ONLY11-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1
+// SIMD-ONLY11-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 1
 // SIMD-ONLY11-NEXT:    store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 8
 // SIMD-ONLY11-NEXT:    ret void
 //
@@ -5219,7 +5219,7 @@ void bar() {
 // SIMD-ONLY12-NEXT:    call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]])
 // SIMD-ONLY12-NEXT:    call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]])
 // SIMD-ONLY12-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4
-// SIMD-ONLY12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// SIMD-ONLY12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // SIMD-ONLY12-NEXT:    store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4
 // SIMD-ONLY12-NEXT:    ret void
 //
@@ -5249,21 +5249,21 @@ void bar() {
 // SIMD-ONLY12-NEXT:    store ptr null, ptr [[LA]], align 4
 // SIMD-ONLY12-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0
 // SIMD-ONLY12-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A]], align 4
-// SIMD-ONLY12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// SIMD-ONLY12-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // SIMD-ONLY12-NEXT:    store ptr [[INCDEC_PTR]], ptr [[A]], align 4
 // SIMD-ONLY12-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1
 // SIMD-ONLY12-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[B]], align 4
 // SIMD-ONLY12-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4
-// SIMD-ONLY12-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1
+// SIMD-ONLY12-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1
 // SIMD-ONLY12-NEXT:    store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 4
 // SIMD-ONLY12-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0
 // SIMD-ONLY12-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[A3]], align 4
-// SIMD-ONLY12-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1
+// SIMD-ONLY12-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1
 // SIMD-ONLY12-NEXT:    store ptr [[INCDEC_PTR4]], ptr [[A3]], align 4
 // SIMD-ONLY12-NEXT:    [[B5:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1
 // SIMD-ONLY12-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[B5]], align 4
 // SIMD-ONLY12-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
-// SIMD-ONLY12-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1
+// SIMD-ONLY12-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 1
 // SIMD-ONLY12-NEXT:    store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 4
 // SIMD-ONLY12-NEXT:    ret void
 //
@@ -5293,7 +5293,7 @@ void bar() {
 // SIMD-ONLY13-NEXT:    call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]])
 // SIMD-ONLY13-NEXT:    call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]])
 // SIMD-ONLY13-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4
-// SIMD-ONLY13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// SIMD-ONLY13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // SIMD-ONLY13-NEXT:    store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4
 // SIMD-ONLY13-NEXT:    ret void
 //
@@ -5323,21 +5323,21 @@ void bar() {
 // SIMD-ONLY13-NEXT:    store ptr null, ptr [[LA]], align 4
 // SIMD-ONLY13-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0
 // SIMD-ONLY13-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A]], align 4
-// SIMD-ONLY13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1
+// SIMD-ONLY13-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1
 // SIMD-ONLY13-NEXT:    store ptr [[INCDEC_PTR]], ptr [[A]], align 4
 // SIMD-ONLY13-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1
 // SIMD-ONLY13-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[B]], align 4
 // SIMD-ONLY13-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4
-// SIMD-ONLY13-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1
+// SIMD-ONLY13-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1
 // SIMD-ONLY13-NEXT:    store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 4
 // SIMD-ONLY13-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0
 // SIMD-ONLY13-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[A3]], align 4
-// SIMD-ONLY13-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1
+// SIMD-ONLY13-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1
 // SIMD-ONLY13-NEXT:    store ptr [[INCDEC_PTR4]], ptr [[A3]], align 4
 // SIMD-ONLY13-NEXT:    [[B5:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1
 // SIMD-ONLY13-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[B5]], align 4
 // SIMD-ONLY13-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
-// SIMD-ONLY13-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1
+// SIMD-ONLY13-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 1
 // SIMD-ONLY13-NEXT:    store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 4
 // SIMD-ONLY13-NEXT:    ret void
 //
diff --git clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp
index fcaceac7d346..87fa7fe462da 100644
--- clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp
+++ clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp
@@ -45,7 +45,7 @@ void foo() {
 // CHECK-NEXT:    store ptr [[CALL]], ptr [[PTR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PTR]], align 8
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 0
 // CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK-NEXT:    store ptr [[PTR]], ptr [[TMP2]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
diff --git clang/test/OpenMP/target_map_codegen_01.cpp clang/test/OpenMP/target_map_codegen_01.cpp
index d112500eb5fd..9f3553d2377c 100644
--- clang/test/OpenMP/target_map_codegen_01.cpp
+++ clang/test/OpenMP/target_map_codegen_01.cpp
@@ -108,6 +108,6 @@ void implicit_maps_reference (int a, int *b){
 // CK2: store ptr [[ADDR]], ptr [[REF]],
 // CK2: [[T:%.+]] = load ptr, ptr [[REF]],
 // CK2: [[TT:%.+]] = load ptr, ptr [[T]],
-// CK2: getelementptr inbounds i32, ptr [[TT]], i32 1
+// CK2: getelementptr inbounds nuw i32, ptr [[TT]], i32 1
 #endif // CK2
 #endif
diff --git clang/test/OpenMP/target_map_codegen_21.cpp clang/test/OpenMP/target_map_codegen_21.cpp
index a1419b7d4beb..f5c517692d8c 100644
--- clang/test/OpenMP/target_map_codegen_21.cpp
+++ clang/test/OpenMP/target_map_codegen_21.cpp
@@ -185,7 +185,7 @@ int explicit_maps_globals(void){
 // CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
 // CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
 // CK22-DAG: store ptr @c, ptr [[BP0]]
-// CK22-DAG: store ptr getelementptr inbounds ([100 x i32], ptr @c, i{{.+}} 0, i{{.+}} 1), ptr [[P0]]
+// CK22-DAG: store ptr getelementptr inbounds nuw ([100 x i32], ptr @c, i{{.+}} 0, i{{.+}} 1), ptr [[P0]]
 
 // CK22: call void [[CALL03:@.+]](ptr {{[^,]+}})
 #pragma omp target map(c [1:4])
@@ -277,7 +277,7 @@ int explicit_maps_globals(void){
 // CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
 // CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
 // CK22-DAG: store ptr @sc, ptr [[BP0]]
-// CK22-DAG: store ptr getelementptr inbounds ([100 x [[ST]]], ptr @sc, i{{.+}} 0, i{{.+}} 1), ptr [[P0]]
+// CK22-DAG: store ptr getelementptr inbounds nuw ([100 x [[ST]]], ptr @sc, i{{.+}} 0, i{{.+}} 1), ptr [[P0]]
 
 // CK22: call void [[CALL08:@.+]](ptr {{[^,]+}})
 #pragma omp target map(sc [1:4])
@@ -369,7 +369,7 @@ int explicit_maps_globals(void){
 // CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
 // CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
 // CK22-DAG: store ptr @stc, ptr [[BP0]]
-// CK22-DAG: store ptr getelementptr inbounds ([100 x [[STT]]], ptr @stc, i{{.+}} 0, i{{.+}} 1),  ptr [[P0]]
+// CK22-DAG: store ptr getelementptr inbounds nuw ([100 x [[STT]]], ptr @stc, i{{.+}} 0, i{{.+}} 1),  ptr [[P0]]
 
 // CK22: call void [[CALL13:@.+]](ptr {{[^,]+}})
 #pragma omp target map(stc [1:4])
diff --git clang/test/OpenMP/target_map_codegen_27.cpp clang/test/OpenMP/target_map_codegen_27.cpp
index fe7ae12e00d1..bfe75bca481b 100644
--- clang/test/OpenMP/target_map_codegen_27.cpp
+++ clang/test/OpenMP/target_map_codegen_27.cpp
@@ -82,7 +82,7 @@ void explicit_maps_pointer_references (int *p){
 // CK28-DAG: store ptr [[VAR1:%.+]], ptr [[P0]]
 // CK28-DAG: [[VAR0]] = load ptr, ptr [[VAR00:%.+]],
 // CK28-DAG: [[VAR00]] = load ptr, ptr [[VAR000:%.+]],
-// CK28-DAG: [[VAR1]] = getelementptr inbounds i32, ptr [[VAR11:%.+]], i{{64|32}} 2
+// CK28-DAG: [[VAR1]] = getelementptr inbounds nuw i32, ptr [[VAR11:%.+]], i{{64|32}} 2
 // CK28-DAG: [[VAR11]] = load ptr, ptr [[VAR111:%.+]],
 // CK28-DAG: [[VAR111]] = load ptr, ptr [[VAR1111:%.+]],
 
diff --git clang/test/OpenMP/target_map_codegen_28.cpp clang/test/OpenMP/target_map_codegen_28.cpp
index e92f7e4773ec..67ea72d791d0 100644
--- clang/test/OpenMP/target_map_codegen_28.cpp
+++ clang/test/OpenMP/target_map_codegen_28.cpp
@@ -89,7 +89,7 @@ struct SSB{
 // CK29-DAG: store ptr [[VAR1:%.+]], ptr [[BP2]]
 // CK29-DAG: store ptr [[VAR2:%.+]], ptr [[P2]]
 // CK29-DAG: [[VAR1]] = getelementptr inbounds nuw [[SSA]], ptr %{{.+}}, i32 0, i32 1
-// CK29-DAG: [[VAR2]] = getelementptr inbounds double, ptr [[VAR22:%.+]], i{{.+}} 0
+// CK29-DAG: [[VAR2]] = getelementptr inbounds nuw double, ptr [[VAR22:%.+]], i{{.+}} 0
 // CK29-DAG: [[VAR22]] = load ptr, ptr %{{.+}},
 
 // CK29: call void [[CALL00:@.+]](ptr {{[^,]+}})
@@ -129,7 +129,7 @@ struct SSB{
 // CK29-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
 // CK29-DAG: store ptr [[VAR1]], ptr [[BP2]]
 // CK29-DAG: store ptr [[VAR2:%.+]], ptr [[P2]]
-// CK29-DAG: [[VAR2]] = getelementptr inbounds double, ptr [[VAR22:%.+]], i{{.+}} 0
+// CK29-DAG: [[VAR2]] = getelementptr inbounds nuw double, ptr [[VAR22:%.+]], i{{.+}} 0
 // CK29-DAG: [[VAR22]] = load ptr, ptr %{{.+}},
 
 // CK29: call void [[CALL00:@.+]](ptr {{[^,]+}})
@@ -164,7 +164,7 @@ struct SSB{
 // CK29-DAG: store ptr [[VAR1:%.+]], ptr [[BP2]]
 // CK29-DAG: store ptr [[VAR2:%.+]], ptr [[P2]]
 // CK29-DAG: [[VAR1]] = getelementptr inbounds nuw [[SSA]], ptr %{{.+}}, i32 0, i32 1
-// CK29-DAG: [[VAR2]] = getelementptr inbounds double, ptr [[VAR22:%.+]], i{{.+}} 0
+// CK29-DAG: [[VAR2]] = getelementptr inbounds nuw double, ptr [[VAR22:%.+]], i{{.+}} 0
 // CK29-DAG: [[VAR22]] = load ptr, ptr %{{.+}},
 
 // CK29: call void [[CALL00:@.+]](ptr {{[^,]+}})
diff --git clang/test/OpenMP/target_map_codegen_29.cpp clang/test/OpenMP/target_map_codegen_29.cpp
index 936a01573c2d..3ca7b228d26c 100644
--- clang/test/OpenMP/target_map_codegen_29.cpp
+++ clang/test/OpenMP/target_map_codegen_29.cpp
@@ -89,7 +89,7 @@ typedef struct StructWithPtrTag : public Base {
 // CK30-DAG: [[PTR:%.+]] = getelementptr inbounds [4 x ptr], ptr [[PTRS]], i32 0, i32 2
 // CK30-DAG: store ptr [[S_PTR1_BEGIN:%.+]], ptr [[PTR]],
 // CK30-DAG: [[S_PTR1]] = getelementptr inbounds nuw [[STRUCT]], ptr [[S]], i32 0, i32 4
-// CK30-DAG: [[S_PTR1_BEGIN]] = getelementptr inbounds i32, ptr [[S_PTR1_BEGIN_REF:%.+]], i{{64|32}} 0
+// CK30-DAG: [[S_PTR1_BEGIN]] = getelementptr inbounds nuw i32, ptr [[S_PTR1_BEGIN_REF:%.+]], i{{64|32}} 0
 // CK30-DAG: [[S_PTR1_BEGIN_REF]] = load ptr, ptr [[S_PTR1:%.+]],
 // CK30-DAG: [[S_PTR1]] = getelementptr inbounds nuw [[STRUCT]], ptr [[S]], i32 0, i32 4
 
@@ -98,7 +98,7 @@ typedef struct StructWithPtrTag : public Base {
 // CK30-DAG: [[PTR:%.+]] = getelementptr inbounds [4 x ptr], ptr [[PTRS]], i32 0, i32 3
 // CK30-DAG: store ptr [[S_PTRBASE1_BEGIN:%.+]], ptr [[PTR]],
 // CK30-DAG: [[S_PTRBASE1]] = getelementptr inbounds nuw [[BASE]], ptr [[S_BASE:%.+]], i32 0, i32 2
-// CK30-DAG: [[S_PTRBASE1_BEGIN]] = getelementptr inbounds i32, ptr [[S_PTRBASE1_BEGIN_REF:%.+]], i{{64|32}} 0
+// CK30-DAG: [[S_PTRBASE1_BEGIN]] = getelementptr inbounds nuw i32, ptr [[S_PTRBASE1_BEGIN_REF:%.+]], i{{64|32}} 0
 // CK30-DAG: [[S_PTRBASE1_BEGIN_REF]] = load ptr, ptr [[S_PTRBASE1:%.+]],
 // CK30-DAG: [[S_PTRBASE1]] = getelementptr inbounds nuw [[BASE]], ptr [[S_BASE:%.+]], i32 0, i32 2
 void map_with_deep_copy() {
diff --git clang/test/OpenMP/target_map_deref_array_codegen.cpp clang/test/OpenMP/target_map_deref_array_codegen.cpp
index 9d395b0ab8cd..e61fc7296332 100644
--- clang/test/OpenMP/target_map_deref_array_codegen.cpp
+++ clang/test/OpenMP/target_map_deref_array_codegen.cpp
@@ -75,7 +75,7 @@ void foo(int **t1d)
 // CHECK-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[T1D_ADDR]], align 8
 // CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[T1D_ADDR]], align 8
 // CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 0
+// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP10]], i64 0
 // CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK-NEXT:    store ptr [[TMP7]], ptr [[TMP11]], align 8
 // CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
diff --git clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp
index 7a0da002fb94..692e3a4214c9 100644
--- clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp
+++ clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp
@@ -28,12 +28,12 @@ struct maptest {
     // CHECK: getelementptr inbounds
     // CHECK: [[S_ADDR:%.+]] = getelementptr inbounds nuw %struct.maptest, ptr [[THIS:%.+]], i32 0, i32 0
     // CHECK: [[S_DATA_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[S_ADDR]], i32 0, i32 0
-    // CHECK: [[S_DATA_0_ADDR:%.+]] = getelementptr inbounds [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 0
+    // CHECK: [[S_DATA_0_ADDR:%.+]] = getelementptr inbounds nuw [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 0
 
     // SZ = &this->s.data[6]-&this->s.data[0]
     // CHECK: [[S_ADDR:%.+]] = getelementptr inbounds nuw %struct.maptest, ptr [[THIS]], i32 0, i32 0
     // CHECK: [[S_DATA_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[S_ADDR]], i32 0, i32 0
-    // CHECK: [[S_DATA_5_ADDR:%.+]] = getelementptr inbounds [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 5
+    // CHECK: [[S_DATA_5_ADDR:%.+]] = getelementptr inbounds nuw [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 5
     // CHECK: [[S_DATA_6_ADDR:%.+]] = getelementptr float, ptr [[S_DATA_5_ADDR]], i32 1
     // CHECK: [[END_BC:%.+]] = ptrtoint ptr [[S_DATA_6_ADDR]] to i64
     // CHECK: [[BEG_BC:%.+]] = ptrtoint ptr [[S_DATA_0_ADDR]] to i64
@@ -64,12 +64,12 @@ struct maptest {
     // CHECK: [[SIZE:%.+]] = alloca [2 x i64],
     // CHECK: [[S_ADDR:%.+]] = getelementptr inbounds nuw %struct.maptest, ptr [[THIS:%.+]], i32 0, i32 0
     // CHECK: [[S_DATA_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[S_ADDR]], i32 0, i32 0
-    // CHECK: [[S_DATA_0_ADDR:%.+]] = getelementptr inbounds [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 0
+    // CHECK: [[S_DATA_0_ADDR:%.+]] = getelementptr inbounds nuw [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 0
 
     // SZ = &this->s.data[6]-&this->s.data[0]
     // CHECK: [[S_ADDR:%.+]] = getelementptr inbounds nuw %struct.maptest, ptr [[THIS]], i32 0, i32 0
     // CHECK: [[S_DATA_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[S_ADDR]], i32 0, i32 0
-    // CHECK: [[S_DATA_5_ADDR:%.+]] = getelementptr inbounds [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 5
+    // CHECK: [[S_DATA_5_ADDR:%.+]] = getelementptr inbounds nuw [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 5
     // CHECK: [[S_DATA_6_ADDR:%.+]] = getelementptr float, ptr [[S_DATA_5_ADDR]], i32 1
     // CHECK: [[END_BC:%.+]] = ptrtoint ptr [[S_DATA_6_ADDR]] to i64
     // CHECK: [[BEG_BC:%.+]] = ptrtoint ptr [[S_DATA_0_ADDR]] to i64
diff --git clang/test/OpenMP/target_map_member_expr_codegen.cpp clang/test/OpenMP/target_map_member_expr_codegen.cpp
index 9b64647928a2..fb36ba7b78d5 100644
--- clang/test/OpenMP/target_map_member_expr_codegen.cpp
+++ clang/test/OpenMP/target_map_member_expr_codegen.cpp
@@ -223,7 +223,7 @@ void foo() {
 // CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK-NEXT:    [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], ptr [[TMP10]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[A4]], align 8
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 0
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 0
 // CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ASIZE]], align 4
 // CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[TMP12]] to i64
 // CHECK-NEXT:    [[TMP13:%.*]] = mul nuw i64 [[CONV]], 4
@@ -233,7 +233,7 @@ void foo() {
 // CHECK-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[D_ADDR]], align 8
 // CHECK-NEXT:    [[C5:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], ptr [[TMP16]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[C5]], align 8
-// CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 0
+// CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 0
 // CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[CSIZE]], align 4
 // CHECK-NEXT:    [[CONV7:%.*]] = zext i32 [[TMP18]] to i64
 // CHECK-NEXT:    [[TMP19:%.*]] = mul nuw i64 [[CONV7]], 4
@@ -343,7 +343,7 @@ void foo() {
 // CHECK-NEXT:    [[TMP79:%.*]] = load ptr, ptr [[_TMP12]], align 8
 // CHECK-NEXT:    [[C15:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], ptr [[TMP79]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP80:%.*]] = load ptr, ptr [[C15]], align 8
-// CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 0
+// CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP80]], i64 0
 // CHECK-NEXT:    [[TMP81:%.*]] = load i32, ptr [[CSIZE]], align 4
 // CHECK-NEXT:    [[CONV17:%.*]] = zext i32 [[TMP81]] to i64
 // CHECK-NEXT:    [[TMP82:%.*]] = mul nuw i64 [[CONV17]], 4
diff --git clang/test/OpenMP/target_map_nest_defalut_mapper_codegen.cpp clang/test/OpenMP/target_map_nest_defalut_mapper_codegen.cpp
index ffb145d8e50f..775f0b296b1b 100644
--- clang/test/OpenMP/target_map_nest_defalut_mapper_codegen.cpp
+++ clang/test/OpenMP/target_map_nest_defalut_mapper_codegen.cpp
@@ -45,7 +45,7 @@ void foo() {
 // CHECK-NEXT:    [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_D]], ptr [[ARRAYIDX1]], i32 0, i32 1
 // CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_C:%.*]], ptr [[F]], i32 0, i32 0
 // CHECK-NEXT:    store i32 222, ptr [[A]], align 4
-// CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x %struct.D], ptr [[SA]], i64 0, i64 0
+// CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [10 x %struct.D], ptr [[SA]], i64 0, i64 0
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK-NEXT:    store ptr [[SA]], ptr [[TMP0]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
diff --git clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
index 3d0710acf0ee..5cce677e8857 100644
--- clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
+++ clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
@@ -96,16 +96,16 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[ARGC1]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]]
 // CHECK1-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9
 // CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
 // CHECK1-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64
 // CHECK1-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
@@ -135,7 +135,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]]
 // CHECK1-NEXT:    store ptr [[_TMP6]], ptr [[_TMP5]], align 8
 // CHECK1-NEXT:    store ptr [[TMP21]], ptr [[_TMP6]], align 8
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC1]], ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -150,19 +150,19 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP27]], align 8
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0
 // CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0
 // CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP33]]
 // CHECK1-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9
 // CHECK1-NEXT:    [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN10]]
+// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN10]]
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP29]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX9]], ptr [[TMP36]], align 8
@@ -483,9 +483,9 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9
 // CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
 // CHECK1-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
 // CHECK1-NEXT:    [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
diff --git clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
index 28d63dbf8c4a..c0bb4a6d6cc8 100644
--- clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
+++ clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
@@ -85,16 +85,16 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[ARGC1]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]]
 // CHECK1-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9
 // CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
 // CHECK1-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64
 // CHECK1-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
@@ -124,7 +124,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]]
 // CHECK1-NEXT:    store ptr [[_TMP5]], ptr [[TMP]], align 8
 // CHECK1-NEXT:    store ptr [[TMP21]], ptr [[_TMP5]], align 8
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC1]], ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -139,19 +139,19 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP27]], align 8
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0
 // CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0
 // CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]]
 // CHECK1-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9
 // CHECK1-NEXT:    [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP29]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8
@@ -429,9 +429,9 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9
 // CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
 // CHECK1-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
 // CHECK1-NEXT:    [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
diff --git clang/test/OpenMP/target_task_affinity_codegen.cpp clang/test/OpenMP/target_task_affinity_codegen.cpp
index 85c5d63a6cd9..53960cee4b73 100644
--- clang/test/OpenMP/target_task_affinity_codegen.cpp
+++ clang/test/OpenMP/target_task_affinity_codegen.cpp
@@ -76,7 +76,7 @@ int main() {
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[A]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i64 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[TMP1]], ptr [[TMP3]], align 8
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -102,7 +102,7 @@ int main() {
 // CHECK1-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[B]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[B]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP18]], i64 0
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[TMP17]], ptr [[TMP19]], align 8
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
@@ -174,9 +174,9 @@ int main() {
 // CHECK1-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 8, ptr @.omp_task_entry.)
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x %struct.kmp_task_affinity_info_t], ptr [[DOTAFFS_ARR_ADDR]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i64 0
 // CHECK1-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1023
+// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i64 1023
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i32 1
 // CHECK1-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK1-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP6]] to i64
@@ -299,7 +299,7 @@ int main() {
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 4
 // CHECK3-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[A]], align 4
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 0
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[TMP1]], ptr [[TMP3]], align 4
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -325,7 +325,7 @@ int main() {
 // CHECK3-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[B]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[B]], align 4
-// CHECK3-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0
+// CHECK3-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP18]], i32 0
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[TMP17]], ptr [[TMP19]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
@@ -397,9 +397,9 @@ int main() {
 // CHECK3-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 24, i32 4, ptr @.omp_task_entry.)
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x %struct.kmp_task_affinity_info_t], ptr [[DOTAFFS_ARR_ADDR]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i32 0
 // CHECK3-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1023
+// CHECK3-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i32 1023
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i32 1
 // CHECK3-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i32
 // CHECK3-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP6]] to i32
@@ -587,9 +587,9 @@ int main() {
 // CHECK9-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 8, ptr @.omp_task_entry.)
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x %struct.kmp_task_affinity_info_t], ptr [[DOTAFFS_ARR_ADDR]], i64 0, i64 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK9-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0
+// CHECK9-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i64 0
 // CHECK9-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK9-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1023
+// CHECK9-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i64 1023
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i32 1
 // CHECK9-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK9-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP6]] to i64
@@ -709,9 +709,9 @@ int main() {
 // CHECK11-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 24, i32 4, ptr @.omp_task_entry.)
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x %struct.kmp_task_affinity_info_t], ptr [[DOTAFFS_ARR_ADDR]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
+// CHECK11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i32 0
 // CHECK11-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK11-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1023
+// CHECK11-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i32 1023
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i32 1
 // CHECK11-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i32
 // CHECK11-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP6]] to i32
diff --git clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
index 6f671dbb27ab..2c36b410af06 100644
--- clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
+++ clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -91,16 +91,16 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[ARGC1]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]]
 // CHECK1-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9
 // CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
 // CHECK1-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64
 // CHECK1-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
@@ -130,7 +130,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]]
 // CHECK1-NEXT:    store ptr [[_TMP5]], ptr [[TMP]], align 8
 // CHECK1-NEXT:    store ptr [[TMP21]], ptr [[_TMP5]], align 8
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC1]], ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -145,19 +145,19 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP27]], align 8
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0
 // CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0
 // CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]]
 // CHECK1-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9
 // CHECK1-NEXT:    [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP29]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8
@@ -435,16 +435,16 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[ARGC1]], align 4
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 0
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]]
 // CHECK1-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP7]], i64 9
 // CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
 // CHECK1-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64
 // CHECK1-NEXT:    [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]]
@@ -474,7 +474,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP22]]
 // CHECK1-NEXT:    store ptr [[_TMP6]], ptr [[_TMP5]], align 8
 // CHECK1-NEXT:    store ptr [[TMP23]], ptr [[_TMP6]], align 8
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC1]], ptr [[TMP24]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -489,19 +489,19 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..4, ptr [[TMP29]], align 8
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP32]], i64 0
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i64 0
 // CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP35]]
 // CHECK1-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP36]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP36]], i64 9
 // CHECK1-NEXT:    [[TMP37:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]]
+// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]]
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP31]], align 8
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX9]], ptr [[TMP38]], align 8
@@ -822,9 +822,9 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9
 // CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
 // CHECK1-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
 // CHECK1-NEXT:    [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
diff --git clang/test/OpenMP/target_update_codegen.cpp clang/test/OpenMP/target_update_codegen.cpp
index 5e038989ab6d..c8211f475c7f 100644
--- clang/test/OpenMP/target_update_codegen.cpp
+++ clang/test/OpenMP/target_update_codegen.cpp
@@ -1118,9 +1118,9 @@ struct ST {
 void foo(int arg) {
   ST arr[3][4];
   // CK20: [[DIMS:%.+]] = alloca [3 x [[STRUCT_DESCRIPTOR]]],
-  // CK20: [[ARRAY_IDX:%.+]] = getelementptr inbounds [3 x [4 x [[STRUCT_ST]]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0
+  // CK20: [[ARRAY_IDX:%.+]] = getelementptr inbounds nuw [3 x [4 x [[STRUCT_ST]]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0
   // CK20: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [4 x [[STRUCT_ST]]], ptr [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0
-  // CK20: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ARRAY_DECAY]], {{.+}}
+  // CK20: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[ARRAY_DECAY]], {{.+}}
   // CK20: [[BP0:%.+]] = getelementptr inbounds [1 x ptr], ptr [[BP:%.+]], {{.+}} 0, {{.+}} 0
   // CK20: store ptr [[ARR]], ptr [[BP0]],
   // CK20: [[P0:%.+]] = getelementptr inbounds [1 x ptr], ptr [[P:%.+]], {{.+}} 0, {{.+}} 0
@@ -1186,9 +1186,9 @@ struct ST {
   // CK21: _ZN2ST3fooEv
   void foo() {
     // CK21: [[DIMS:%.+]] = alloca [4 x [[STRUCT_DESCRIPTOR]]],
-    // CK21: [[ARRAY_IDX:%.+]] = getelementptr inbounds [10 x [10 x [10 x ptr]]], ptr [[DPTR:%.+]], {{.+}} 0, {{.+}} 0
+    // CK21: [[ARRAY_IDX:%.+]] = getelementptr inbounds nuw [10 x [10 x [10 x ptr]]], ptr [[DPTR:%.+]], {{.+}} 0, {{.+}} 0
     // CK21: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [10 x [10 x ptr]], ptr [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0
-    // CK21: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [10 x ptr], ptr [[ARRAY_DECAY]], {{.+}} 1
+    // CK21: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds nuw [10 x ptr], ptr [[ARRAY_DECAY]], {{.+}} 1
     // CK21: [[ARRAY_DECAY_2:%.+]] = getelementptr inbounds [10 x ptr], ptr [[ARRAY_IDX_1]], {{.+}} 0, {{.+}} 0
     // CK21: [[ARRAY_IDX_3:%.+]] = getelementptr inbounds {{.+}}, ptr [[ARRAY_DECAY_2]], {{.+}} 0
     // CK21: [[BP0:%.+]] = getelementptr inbounds [2 x ptr], ptr [[BP:%.+]], {{.+}} 0, {{.+}} 0
@@ -1262,9 +1262,9 @@ struct ST {
   // CK22: _ZN2ST3fooEPA10_Pi
   void foo(int *arr[5][10]) {
     // CK22: [[DIMS:%.+]] = alloca [4 x [[STRUCT_DESCRIPTOR]]],
-    // CK22: [[ARRAY_IDX:%.+]] = getelementptr inbounds [10 x ptr], ptr [[ARR:%.+]], {{.+}} 0
+    // CK22: [[ARRAY_IDX:%.+]] = getelementptr inbounds nuw [10 x ptr], ptr [[ARR:%.+]], {{.+}} 0
     // CK22: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [10 x ptr], ptr [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0
-    // CK22: [[ARRAY_IDX_2:%.+]] = getelementptr inbounds ptr, ptr [[ARRAY_DECAY:%.+]], {{.+}} 1
+    // CK22: [[ARRAY_IDX_2:%.+]] = getelementptr inbounds nuw ptr, ptr [[ARRAY_DECAY:%.+]], {{.+}} 1
     // CK22: [[BP0:%.+]] = getelementptr inbounds [1 x ptr], ptr [[BP:%.+]], {{.+}} 0, {{.+}} 0
     // CK22: [[P0:%.+]] = getelementptr inbounds [1 x ptr], ptr [[P:%.+]], i{{.+}} 0, i{{.+}} 0
     // CK22: [[DIM_1:%.+]] = getelementptr inbounds [4 x [[STRUCT_DESCRIPTOR]]], ptr [[DIMS]], {{.+}} 0, {{.+}} 0
@@ -1338,11 +1338,11 @@ void foo(int arg) {
   float farr[5][5][5];
   // CK23: [[ARG_ADDR:%.+]] = alloca i32,
   // CK23: [[DIMS:%.+]] = alloca [4 x [[STRUCT_DESCRIPTOR]]],
-  // CK23: [[ARRAY_IDX:%.+]] = getelementptr inbounds [5 x [5 x [5 x float]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0
+  // CK23: [[ARRAY_IDX:%.+]] = getelementptr inbounds nuw [5 x [5 x [5 x float]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0
   // CK23: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [5 x [5 x float]], ptr [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0
-  // CK23: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [5 x float], ptr [[ARRAY_DECAY]], {{.+}}
+  // CK23: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds nuw [5 x float], ptr [[ARRAY_DECAY]], {{.+}}
   // CK23: [[ARRAY_DECAY_2:%.+]] = getelementptr inbounds [5 x float], ptr [[ARRAY_IDX_1]], {{.+}} 0, {{.+}} 0
-  // CK23: [[ARRAY_IDX_2:%.+]] = getelementptr inbounds float, ptr [[ARRAY_DECAY_2]], {{.+}}
+  // CK23: [[ARRAY_IDX_2:%.+]] = getelementptr inbounds nuw float, ptr [[ARRAY_DECAY_2]], {{.+}}
   // CK23: [[MUL:%.+]] = mul nuw i64 4,
   // CK23: [[BP0:%.+]] = getelementptr inbounds [1 x ptr], ptr [[BP:%.+]], {{.+}} 0, {{.+}} 0
   // CK23: store ptr [[ARR]], ptr [[BP0]],
@@ -1411,11 +1411,11 @@ void foo(int arg) {
 void foo(int arg) {
   double darr[3][4][5];
   // CK24: [[DIMS:%.+]] = alloca [4 x [[STRUCT_DESCRIPTOR]]],
-  // CK24: [[ARRAY_IDX:%.+]] = getelementptr inbounds [3 x [4 x [5 x double]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0
+  // CK24: [[ARRAY_IDX:%.+]] = getelementptr inbounds nuw [3 x [4 x [5 x double]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0
   // CK24: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [4 x [5 x double]], ptr [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0
-  // CK24: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [5 x double], ptr [[ARRAY_DECAY]], {{.+}}
+  // CK24: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds nuw [5 x double], ptr [[ARRAY_DECAY]], {{.+}}
   // CK24: [[ARRAY_DECAY_2:%.+]] = getelementptr inbounds [5 x double], ptr [[ARRAY_IDX_1]], {{.+}} 0, {{.+}} 0
-  // CK24: [[ARRAY_IDX_2:%.+]] = getelementptr inbounds double, ptr [[ARRAY_DECAY_2]], {{.+}}
+  // CK24: [[ARRAY_IDX_2:%.+]] = getelementptr inbounds nuw double, ptr [[ARRAY_DECAY_2]], {{.+}}
   // CK24: [[MUL:%.+]] = mul nuw i64 8,
   // CK24: [[SUB:%.+]] = sub nuw i64 4, [[ARG:%.+]]
   // CK24: [[LEN:%.+]] = udiv {{.+}} [[SUB]], 1
@@ -1488,15 +1488,15 @@ void foo(int arg) {
 
   // CK25: [[DIMS:%.+]] = alloca [4 x [[STRUCT_DESCRIPTOR]]],
   // CK25: [[DIMS_2:%.+]] = alloca [3 x [[STRUCT_DESCRIPTOR]]],
-  // CK25: [[ARRAY_IDX:%.+]] = getelementptr inbounds [3 x [4 x [5 x i32]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0
+  // CK25: [[ARRAY_IDX:%.+]] = getelementptr inbounds nuw [3 x [4 x [5 x i32]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0
   // CK25: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [4 x [5 x i32]], ptr [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0
-  // CK25: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [5 x i32], ptr [[ARRAY_DECAY]], {{.+}}
+  // CK25: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds nuw [5 x i32], ptr [[ARRAY_DECAY]], {{.+}}
   // CK25: [[ARRAY_DECAY_2:%.+]] = getelementptr inbounds [5 x i32], ptr [[ARRAY_IDX_1]], {{.+}} 0, {{.+}} 0
-  // CK25: [[ARRAY_IDX_3:%.+]] = getelementptr inbounds {{.+}}, ptr [[ARRAY_DECAY_2]], {{.+}} 1
+  // CK25: [[ARRAY_IDX_3:%.+]] = getelementptr inbounds nuw {{.+}}, ptr [[ARRAY_DECAY_2]], {{.+}} 1
   // CK25: [[LEN:%.+]] = sub nuw i64 4, [[ARG_ADDR:%.+]]
-  // CK25: [[ARRAY_IDX_4:%.+]] = getelementptr inbounds [4 x [3 x float]], ptr [[FARR:%.+]], {{.+}} 0, {{.+}} 0
+  // CK25: [[ARRAY_IDX_4:%.+]] = getelementptr inbounds nuw [4 x [3 x float]], ptr [[FARR:%.+]], {{.+}} 0, {{.+}} 0
   // CK25: [[ARRAY_DECAY_5:%.+]] = getelementptr inbounds [3 x float], ptr [[ARRAY_IDX_4]], {{.+}} 0, {{.+}} 0
-  // CK25: [[ARRAY_IDX_6:%.+]] = getelementptr inbounds float, ptr [[ARRAY_DECAY_5:%.+]], {{.+}} 1
+  // CK25: [[ARRAY_IDX_6:%.+]] = getelementptr inbounds nuw float, ptr [[ARRAY_DECAY_5:%.+]], {{.+}} 1
   // CK25: [[BP0:%.+]] = getelementptr inbounds [3 x ptr], ptr [[BP:%.+]], i{{.+}} 0, i{{.+}} 0
   // CK25: [[P0:%.+]] = getelementptr inbounds [3 x ptr], ptr [[P:%.+]], i{{.+}} 0, i{{.+}} 0
   // CK25: [[DIM_1:%.+]] = getelementptr inbounds [4 x [[STRUCT_DESCRIPTOR]]], ptr [[DIMS]], {{.+}} 0, {{.+}} 0
diff --git clang/test/OpenMP/task_codegen.c clang/test/OpenMP/task_codegen.c
index 0d10cbce4aa8..d08eb3762d5c 100644
--- clang/test/OpenMP/task_codegen.c
+++ clang/test/OpenMP/task_codegen.c
@@ -183,7 +183,7 @@ for (int i = 0; i < 10; ++i)
   // CHECK: [[A:%.+]] = load ptr, ptr [[A_ADDR:%.+]],
   // CHECK: [[K:%.+]] = load i32, ptr [[K_ADDR]],
   // CHECK: [[IDX:%.+]] = zext i32 [[K]] to i64
-  // CHECK: [[AK_ADDR:%.+]] = getelementptr inbounds ptr, ptr [[A]], i64 [[IDX]]
+  // CHECK: [[AK_ADDR:%.+]] = getelementptr inbounds nuw ptr, ptr [[A]], i64 [[IDX]]
   // CHECK: [[AK:%.+]] = load ptr, ptr [[AK_ADDR]],
   // CHECK: [[I:%.+]] = load i32, ptr [[I_ADDR]],
   // CHECK: [[IDX:%.+]] = sext i32 [[I]] to i64
diff --git clang/test/OpenMP/task_codegen.cpp clang/test/OpenMP/task_codegen.cpp
index b256c41132ed..c3e6d9e6b1cf 100644
--- clang/test/OpenMP/task_codegen.cpp
+++ clang/test/OpenMP/task_codegen.cpp
@@ -309,9 +309,9 @@ void test_omp_all_memory()
 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP30]], i32 0, i32 2
 // CHECK1-NEXT:    store i8 1, ptr [[TMP33]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = mul nsw i64 0, [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]]
 // CHECK1-NEXT:    [[TMP35:%.*]] = mul nsw i64 9, [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP35]]
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP35]]
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX2]], i32 1
 // CHECK1-NEXT:    [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK1-NEXT:    [[TMP38:%.*]] = ptrtoint ptr [[TMP36]] to i64
@@ -346,13 +346,13 @@ void test_omp_all_memory()
 // CHECK1-NEXT:    [[TMP58:%.*]] = load i8, ptr [[B]], align 1
 // CHECK1-NEXT:    [[TMP59:%.*]] = sext i8 [[TMP58]] to i64
 // CHECK1-NEXT:    [[TMP60:%.*]] = mul nsw i64 4, [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP60]]
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX7]], i64 [[TMP59]]
+// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP60]]
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX7]], i64 [[TMP59]]
 // CHECK1-NEXT:    [[TMP61:%.*]] = load i8, ptr [[B]], align 1
 // CHECK1-NEXT:    [[TMP62:%.*]] = sext i8 [[TMP61]] to i64
 // CHECK1-NEXT:    [[TMP63:%.*]] = mul nsw i64 9, [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP63]]
-// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX9]], i64 [[TMP62]]
+// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP63]]
+// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX9]], i64 [[TMP62]]
 // CHECK1-NEXT:    [[TMP64:%.*]] = getelementptr i32, ptr [[ARRAYIDX10]], i32 1
 // CHECK1-NEXT:    [[TMP65:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64
 // CHECK1-NEXT:    [[TMP66:%.*]] = ptrtoint ptr [[TMP64]] to i64
@@ -384,13 +384,13 @@ void test_omp_all_memory()
 // CHECK1-NEXT:    [[TMP83:%.*]] = load i8, ptr [[B]], align 1
 // CHECK1-NEXT:    [[TMP84:%.*]] = sext i8 [[TMP83]] to i64
 // CHECK1-NEXT:    [[TMP85:%.*]] = mul nsw i64 4, [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP85]]
-// CHECK1-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP84]]
+// CHECK1-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP85]]
+// CHECK1-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP84]]
 // CHECK1-NEXT:    [[TMP86:%.*]] = load i8, ptr [[B]], align 1
 // CHECK1-NEXT:    [[TMP87:%.*]] = sext i8 [[TMP86]] to i64
 // CHECK1-NEXT:    [[TMP88:%.*]] = mul nsw i64 9, [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP88]]
-// CHECK1-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX17]], i64 [[TMP87]]
+// CHECK1-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP88]]
+// CHECK1-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX17]], i64 [[TMP87]]
 // CHECK1-NEXT:    [[TMP89:%.*]] = getelementptr i32, ptr [[ARRAYIDX18]], i32 1
 // CHECK1-NEXT:    [[TMP90:%.*]] = ptrtoint ptr [[ARRAYIDX16]] to i64
 // CHECK1-NEXT:    [[TMP91:%.*]] = ptrtoint ptr [[TMP89]] to i64
@@ -427,8 +427,8 @@ void test_omp_all_memory()
 // CHECK1-NEXT:    [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP108]], i32 0, i32 2
 // CHECK1-NEXT:    store i8 3, ptr [[TMP111]], align 8
 // CHECK1-NEXT:    [[TMP112:%.*]] = mul nsw i64 0, [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP112]]
-// CHECK1-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 3
+// CHECK1-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP112]]
+// CHECK1-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 3
 // CHECK1-NEXT:    [[TMP113:%.*]] = load i32, ptr @a, align 4
 // CHECK1-NEXT:    [[TMP114:%.*]] = sext i32 [[TMP113]] to i64
 // CHECK1-NEXT:    [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP114]], 1
@@ -436,8 +436,8 @@ void test_omp_all_memory()
 // CHECK1-NEXT:    [[TMP116:%.*]] = sext i32 [[TMP115]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP116]]
 // CHECK1-NEXT:    [[TMP117:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP117]]
-// CHECK1-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]]
+// CHECK1-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP117]]
+// CHECK1-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]]
 // CHECK1-NEXT:    [[TMP118:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1
 // CHECK1-NEXT:    [[TMP119:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64
 // CHECK1-NEXT:    [[TMP120:%.*]] = ptrtoint ptr [[TMP118]] to i64
@@ -1432,9 +1432,9 @@ void test_omp_all_memory()
 // CHECK1-51-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP30]], i32 0, i32 2
 // CHECK1-51-NEXT:    store i8 1, ptr [[TMP33]], align 8
 // CHECK1-51-NEXT:    [[TMP34:%.*]] = mul nsw i64 0, [[TMP2]]
-// CHECK1-51-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]]
+// CHECK1-51-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]]
 // CHECK1-51-NEXT:    [[TMP35:%.*]] = mul nsw i64 9, [[TMP2]]
-// CHECK1-51-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP35]]
+// CHECK1-51-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP35]]
 // CHECK1-51-NEXT:    [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX2]], i32 1
 // CHECK1-51-NEXT:    [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK1-51-NEXT:    [[TMP38:%.*]] = ptrtoint ptr [[TMP36]] to i64
@@ -1469,13 +1469,13 @@ void test_omp_all_memory()
 // CHECK1-51-NEXT:    [[TMP58:%.*]] = load i8, ptr [[B]], align 1
 // CHECK1-51-NEXT:    [[TMP59:%.*]] = sext i8 [[TMP58]] to i64
 // CHECK1-51-NEXT:    [[TMP60:%.*]] = mul nsw i64 4, [[TMP2]]
-// CHECK1-51-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP60]]
-// CHECK1-51-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX7]], i64 [[TMP59]]
+// CHECK1-51-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP60]]
+// CHECK1-51-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX7]], i64 [[TMP59]]
 // CHECK1-51-NEXT:    [[TMP61:%.*]] = load i8, ptr [[B]], align 1
 // CHECK1-51-NEXT:    [[TMP62:%.*]] = sext i8 [[TMP61]] to i64
 // CHECK1-51-NEXT:    [[TMP63:%.*]] = mul nsw i64 9, [[TMP2]]
-// CHECK1-51-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP63]]
-// CHECK1-51-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX9]], i64 [[TMP62]]
+// CHECK1-51-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP63]]
+// CHECK1-51-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX9]], i64 [[TMP62]]
 // CHECK1-51-NEXT:    [[TMP64:%.*]] = getelementptr i32, ptr [[ARRAYIDX10]], i32 1
 // CHECK1-51-NEXT:    [[TMP65:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64
 // CHECK1-51-NEXT:    [[TMP66:%.*]] = ptrtoint ptr [[TMP64]] to i64
@@ -1507,13 +1507,13 @@ void test_omp_all_memory()
 // CHECK1-51-NEXT:    [[TMP83:%.*]] = load i8, ptr [[B]], align 1
 // CHECK1-51-NEXT:    [[TMP84:%.*]] = sext i8 [[TMP83]] to i64
 // CHECK1-51-NEXT:    [[TMP85:%.*]] = mul nsw i64 4, [[TMP2]]
-// CHECK1-51-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP85]]
-// CHECK1-51-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP84]]
+// CHECK1-51-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP85]]
+// CHECK1-51-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP84]]
 // CHECK1-51-NEXT:    [[TMP86:%.*]] = load i8, ptr [[B]], align 1
 // CHECK1-51-NEXT:    [[TMP87:%.*]] = sext i8 [[TMP86]] to i64
 // CHECK1-51-NEXT:    [[TMP88:%.*]] = mul nsw i64 9, [[TMP2]]
-// CHECK1-51-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP88]]
-// CHECK1-51-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX17]], i64 [[TMP87]]
+// CHECK1-51-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP88]]
+// CHECK1-51-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX17]], i64 [[TMP87]]
 // CHECK1-51-NEXT:    [[TMP89:%.*]] = getelementptr i32, ptr [[ARRAYIDX18]], i32 1
 // CHECK1-51-NEXT:    [[TMP90:%.*]] = ptrtoint ptr [[ARRAYIDX16]] to i64
 // CHECK1-51-NEXT:    [[TMP91:%.*]] = ptrtoint ptr [[TMP89]] to i64
@@ -1550,8 +1550,8 @@ void test_omp_all_memory()
 // CHECK1-51-NEXT:    [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP108]], i32 0, i32 2
 // CHECK1-51-NEXT:    store i8 3, ptr [[TMP111]], align 8
 // CHECK1-51-NEXT:    [[TMP112:%.*]] = mul nsw i64 0, [[TMP2]]
-// CHECK1-51-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP112]]
-// CHECK1-51-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 3
+// CHECK1-51-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP112]]
+// CHECK1-51-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 3
 // CHECK1-51-NEXT:    [[TMP113:%.*]] = load i32, ptr @a, align 4
 // CHECK1-51-NEXT:    [[TMP114:%.*]] = sext i32 [[TMP113]] to i64
 // CHECK1-51-NEXT:    [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP114]], 1
@@ -1559,8 +1559,8 @@ void test_omp_all_memory()
 // CHECK1-51-NEXT:    [[TMP116:%.*]] = sext i32 [[TMP115]] to i64
 // CHECK1-51-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP116]]
 // CHECK1-51-NEXT:    [[TMP117:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP2]]
-// CHECK1-51-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP117]]
-// CHECK1-51-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]]
+// CHECK1-51-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP117]]
+// CHECK1-51-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]]
 // CHECK1-51-NEXT:    [[TMP118:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1
 // CHECK1-51-NEXT:    [[TMP119:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64
 // CHECK1-51-NEXT:    [[TMP120:%.*]] = ptrtoint ptr [[TMP118]] to i64
@@ -1595,8 +1595,8 @@ void test_omp_all_memory()
 // CHECK1-51-NEXT:    [[TMP139:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP136]], i32 0, i32 2
 // CHECK1-51-NEXT:    store i8 8, ptr [[TMP139]], align 8
 // CHECK1-51-NEXT:    [[TMP140:%.*]] = mul nsw i64 0, [[TMP2]]
-// CHECK1-51-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP140]]
-// CHECK1-51-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX31]], i64 3
+// CHECK1-51-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP140]]
+// CHECK1-51-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX31]], i64 3
 // CHECK1-51-NEXT:    [[TMP141:%.*]] = load i32, ptr @a, align 4
 // CHECK1-51-NEXT:    [[TMP142:%.*]] = sext i32 [[TMP141]] to i64
 // CHECK1-51-NEXT:    [[LEN_SUB_133:%.*]] = sub nsw i64 [[TMP142]], 1
@@ -1604,8 +1604,8 @@ void test_omp_all_memory()
 // CHECK1-51-NEXT:    [[TMP144:%.*]] = sext i32 [[TMP143]] to i64
 // CHECK1-51-NEXT:    [[LB_ADD_LEN34:%.*]] = add nsw i64 -1, [[TMP144]]
 // CHECK1-51-NEXT:    [[TMP145:%.*]] = mul nsw i64 [[LB_ADD_LEN34]], [[TMP2]]
-// CHECK1-51-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP145]]
-// CHECK1-51-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_133]]
+// CHECK1-51-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP145]]
+// CHECK1-51-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_133]]
 // CHECK1-51-NEXT:    [[TMP146:%.*]] = getelementptr i32, ptr [[ARRAYIDX36]], i32 1
 // CHECK1-51-NEXT:    [[TMP147:%.*]] = ptrtoint ptr [[ARRAYIDX32]] to i64
 // CHECK1-51-NEXT:    [[TMP148:%.*]] = ptrtoint ptr [[TMP146]] to i64
@@ -3040,9 +3040,9 @@ void test_omp_all_memory()
 // CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP30]], i32 0, i32 2
 // CHECK2-NEXT:    store i8 1, ptr [[TMP33]], align 8
 // CHECK2-NEXT:    [[TMP34:%.*]] = mul nsw i64 0, [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]]
 // CHECK2-NEXT:    [[TMP35:%.*]] = mul nsw i64 9, [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP35]]
+// CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP35]]
 // CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX2]], i32 1
 // CHECK2-NEXT:    [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK2-NEXT:    [[TMP38:%.*]] = ptrtoint ptr [[TMP36]] to i64
@@ -3077,13 +3077,13 @@ void test_omp_all_memory()
 // CHECK2-NEXT:    [[TMP58:%.*]] = load i8, ptr [[B]], align 1
 // CHECK2-NEXT:    [[TMP59:%.*]] = sext i8 [[TMP58]] to i64
 // CHECK2-NEXT:    [[TMP60:%.*]] = mul nsw i64 4, [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP60]]
-// CHECK2-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX7]], i64 [[TMP59]]
+// CHECK2-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP60]]
+// CHECK2-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX7]], i64 [[TMP59]]
 // CHECK2-NEXT:    [[TMP61:%.*]] = load i8, ptr [[B]], align 1
 // CHECK2-NEXT:    [[TMP62:%.*]] = sext i8 [[TMP61]] to i64
 // CHECK2-NEXT:    [[TMP63:%.*]] = mul nsw i64 9, [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP63]]
-// CHECK2-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX9]], i64 [[TMP62]]
+// CHECK2-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP63]]
+// CHECK2-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX9]], i64 [[TMP62]]
 // CHECK2-NEXT:    [[TMP64:%.*]] = getelementptr i32, ptr [[ARRAYIDX10]], i32 1
 // CHECK2-NEXT:    [[TMP65:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64
 // CHECK2-NEXT:    [[TMP66:%.*]] = ptrtoint ptr [[TMP64]] to i64
@@ -3115,13 +3115,13 @@ void test_omp_all_memory()
 // CHECK2-NEXT:    [[TMP83:%.*]] = load i8, ptr [[B]], align 1
 // CHECK2-NEXT:    [[TMP84:%.*]] = sext i8 [[TMP83]] to i64
 // CHECK2-NEXT:    [[TMP85:%.*]] = mul nsw i64 4, [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP85]]
-// CHECK2-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP84]]
+// CHECK2-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP85]]
+// CHECK2-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP84]]
 // CHECK2-NEXT:    [[TMP86:%.*]] = load i8, ptr [[B]], align 1
 // CHECK2-NEXT:    [[TMP87:%.*]] = sext i8 [[TMP86]] to i64
 // CHECK2-NEXT:    [[TMP88:%.*]] = mul nsw i64 9, [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP88]]
-// CHECK2-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX17]], i64 [[TMP87]]
+// CHECK2-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP88]]
+// CHECK2-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX17]], i64 [[TMP87]]
 // CHECK2-NEXT:    [[TMP89:%.*]] = getelementptr i32, ptr [[ARRAYIDX18]], i32 1
 // CHECK2-NEXT:    [[TMP90:%.*]] = ptrtoint ptr [[ARRAYIDX16]] to i64
 // CHECK2-NEXT:    [[TMP91:%.*]] = ptrtoint ptr [[TMP89]] to i64
@@ -3158,8 +3158,8 @@ void test_omp_all_memory()
 // CHECK2-NEXT:    [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP108]], i32 0, i32 2
 // CHECK2-NEXT:    store i8 3, ptr [[TMP111]], align 8
 // CHECK2-NEXT:    [[TMP112:%.*]] = mul nsw i64 0, [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP112]]
-// CHECK2-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 3
+// CHECK2-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP112]]
+// CHECK2-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 3
 // CHECK2-NEXT:    [[TMP113:%.*]] = load i32, ptr @a, align 4
 // CHECK2-NEXT:    [[TMP114:%.*]] = sext i32 [[TMP113]] to i64
 // CHECK2-NEXT:    [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP114]], 1
@@ -3167,8 +3167,8 @@ void test_omp_all_memory()
 // CHECK2-NEXT:    [[TMP116:%.*]] = sext i32 [[TMP115]] to i64
 // CHECK2-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP116]]
 // CHECK2-NEXT:    [[TMP117:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP117]]
-// CHECK2-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]]
+// CHECK2-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP117]]
+// CHECK2-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]]
 // CHECK2-NEXT:    [[TMP118:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1
 // CHECK2-NEXT:    [[TMP119:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64
 // CHECK2-NEXT:    [[TMP120:%.*]] = ptrtoint ptr [[TMP118]] to i64
@@ -4163,9 +4163,9 @@ void test_omp_all_memory()
 // CHECK2-51-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP30]], i32 0, i32 2
 // CHECK2-51-NEXT:    store i8 1, ptr [[TMP33]], align 8
 // CHECK2-51-NEXT:    [[TMP34:%.*]] = mul nsw i64 0, [[TMP2]]
-// CHECK2-51-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]]
+// CHECK2-51-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]]
 // CHECK2-51-NEXT:    [[TMP35:%.*]] = mul nsw i64 9, [[TMP2]]
-// CHECK2-51-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP35]]
+// CHECK2-51-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP35]]
 // CHECK2-51-NEXT:    [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX2]], i32 1
 // CHECK2-51-NEXT:    [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK2-51-NEXT:    [[TMP38:%.*]] = ptrtoint ptr [[TMP36]] to i64
@@ -4200,13 +4200,13 @@ void test_omp_all_memory()
 // CHECK2-51-NEXT:    [[TMP58:%.*]] = load i8, ptr [[B]], align 1
 // CHECK2-51-NEXT:    [[TMP59:%.*]] = sext i8 [[TMP58]] to i64
 // CHECK2-51-NEXT:    [[TMP60:%.*]] = mul nsw i64 4, [[TMP2]]
-// CHECK2-51-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP60]]
-// CHECK2-51-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX7]], i64 [[TMP59]]
+// CHECK2-51-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP60]]
+// CHECK2-51-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX7]], i64 [[TMP59]]
 // CHECK2-51-NEXT:    [[TMP61:%.*]] = load i8, ptr [[B]], align 1
 // CHECK2-51-NEXT:    [[TMP62:%.*]] = sext i8 [[TMP61]] to i64
 // CHECK2-51-NEXT:    [[TMP63:%.*]] = mul nsw i64 9, [[TMP2]]
-// CHECK2-51-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP63]]
-// CHECK2-51-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX9]], i64 [[TMP62]]
+// CHECK2-51-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP63]]
+// CHECK2-51-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX9]], i64 [[TMP62]]
 // CHECK2-51-NEXT:    [[TMP64:%.*]] = getelementptr i32, ptr [[ARRAYIDX10]], i32 1
 // CHECK2-51-NEXT:    [[TMP65:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64
 // CHECK2-51-NEXT:    [[TMP66:%.*]] = ptrtoint ptr [[TMP64]] to i64
@@ -4238,13 +4238,13 @@ void test_omp_all_memory()
 // CHECK2-51-NEXT:    [[TMP83:%.*]] = load i8, ptr [[B]], align 1
 // CHECK2-51-NEXT:    [[TMP84:%.*]] = sext i8 [[TMP83]] to i64
 // CHECK2-51-NEXT:    [[TMP85:%.*]] = mul nsw i64 4, [[TMP2]]
-// CHECK2-51-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP85]]
-// CHECK2-51-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP84]]
+// CHECK2-51-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP85]]
+// CHECK2-51-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP84]]
 // CHECK2-51-NEXT:    [[TMP86:%.*]] = load i8, ptr [[B]], align 1
 // CHECK2-51-NEXT:    [[TMP87:%.*]] = sext i8 [[TMP86]] to i64
 // CHECK2-51-NEXT:    [[TMP88:%.*]] = mul nsw i64 9, [[TMP2]]
-// CHECK2-51-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP88]]
-// CHECK2-51-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX17]], i64 [[TMP87]]
+// CHECK2-51-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP88]]
+// CHECK2-51-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX17]], i64 [[TMP87]]
 // CHECK2-51-NEXT:    [[TMP89:%.*]] = getelementptr i32, ptr [[ARRAYIDX18]], i32 1
 // CHECK2-51-NEXT:    [[TMP90:%.*]] = ptrtoint ptr [[ARRAYIDX16]] to i64
 // CHECK2-51-NEXT:    [[TMP91:%.*]] = ptrtoint ptr [[TMP89]] to i64
@@ -4281,8 +4281,8 @@ void test_omp_all_memory()
 // CHECK2-51-NEXT:    [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP108]], i32 0, i32 2
 // CHECK2-51-NEXT:    store i8 3, ptr [[TMP111]], align 8
 // CHECK2-51-NEXT:    [[TMP112:%.*]] = mul nsw i64 0, [[TMP2]]
-// CHECK2-51-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP112]]
-// CHECK2-51-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 3
+// CHECK2-51-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP112]]
+// CHECK2-51-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 3
 // CHECK2-51-NEXT:    [[TMP113:%.*]] = load i32, ptr @a, align 4
 // CHECK2-51-NEXT:    [[TMP114:%.*]] = sext i32 [[TMP113]] to i64
 // CHECK2-51-NEXT:    [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP114]], 1
@@ -4290,8 +4290,8 @@ void test_omp_all_memory()
 // CHECK2-51-NEXT:    [[TMP116:%.*]] = sext i32 [[TMP115]] to i64
 // CHECK2-51-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP116]]
 // CHECK2-51-NEXT:    [[TMP117:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP2]]
-// CHECK2-51-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP117]]
-// CHECK2-51-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]]
+// CHECK2-51-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP117]]
+// CHECK2-51-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]]
 // CHECK2-51-NEXT:    [[TMP118:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1
 // CHECK2-51-NEXT:    [[TMP119:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64
 // CHECK2-51-NEXT:    [[TMP120:%.*]] = ptrtoint ptr [[TMP118]] to i64
@@ -4326,8 +4326,8 @@ void test_omp_all_memory()
 // CHECK2-51-NEXT:    [[TMP139:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP136]], i32 0, i32 2
 // CHECK2-51-NEXT:    store i8 8, ptr [[TMP139]], align 8
 // CHECK2-51-NEXT:    [[TMP140:%.*]] = mul nsw i64 0, [[TMP2]]
-// CHECK2-51-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP140]]
-// CHECK2-51-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX31]], i64 3
+// CHECK2-51-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP140]]
+// CHECK2-51-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX31]], i64 3
 // CHECK2-51-NEXT:    [[TMP141:%.*]] = load i32, ptr @a, align 4
 // CHECK2-51-NEXT:    [[TMP142:%.*]] = sext i32 [[TMP141]] to i64
 // CHECK2-51-NEXT:    [[LEN_SUB_133:%.*]] = sub nsw i64 [[TMP142]], 1
@@ -4335,8 +4335,8 @@ void test_omp_all_memory()
 // CHECK2-51-NEXT:    [[TMP144:%.*]] = sext i32 [[TMP143]] to i64
 // CHECK2-51-NEXT:    [[LB_ADD_LEN34:%.*]] = add nsw i64 -1, [[TMP144]]
 // CHECK2-51-NEXT:    [[TMP145:%.*]] = mul nsw i64 [[LB_ADD_LEN34]], [[TMP2]]
-// CHECK2-51-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP145]]
-// CHECK2-51-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_133]]
+// CHECK2-51-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP145]]
+// CHECK2-51-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_133]]
 // CHECK2-51-NEXT:    [[TMP146:%.*]] = getelementptr i32, ptr [[ARRAYIDX36]], i32 1
 // CHECK2-51-NEXT:    [[TMP147:%.*]] = ptrtoint ptr [[ARRAYIDX32]] to i64
 // CHECK2-51-NEXT:    [[TMP148:%.*]] = ptrtoint ptr [[TMP146]] to i64
@@ -5773,9 +5773,9 @@ void test_omp_all_memory()
 // CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP29]], i32 0, i32 2
 // CHECK3-NEXT:    store i8 1, ptr [[TMP32]], align 8
 // CHECK3-NEXT:    [[TMP33:%.*]] = mul nsw i64 0, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP33]]
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP33]]
 // CHECK3-NEXT:    [[TMP34:%.*]] = mul nsw i64 9, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]]
+// CHECK3-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]]
 // CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr i32, ptr [[ARRAYIDX4]], i32 1
 // CHECK3-NEXT:    [[TMP36:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK3-NEXT:    [[TMP37:%.*]] = ptrtoint ptr [[TMP35]] to i64
@@ -5814,13 +5814,13 @@ void test_omp_all_memory()
 // CHECK3-NEXT:    [[TMP57:%.*]] = load i8, ptr [[B]], align 1
 // CHECK3-NEXT:    [[TMP58:%.*]] = sext i8 [[TMP57]] to i64
 // CHECK3-NEXT:    [[TMP59:%.*]] = mul nsw i64 4, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP59]]
-// CHECK3-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX13]], i64 [[TMP58]]
+// CHECK3-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP59]]
+// CHECK3-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX13]], i64 [[TMP58]]
 // CHECK3-NEXT:    [[TMP60:%.*]] = load i8, ptr [[B]], align 1
 // CHECK3-NEXT:    [[TMP61:%.*]] = sext i8 [[TMP60]] to i64
 // CHECK3-NEXT:    [[TMP62:%.*]] = mul nsw i64 9, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP62]]
-// CHECK3-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP61]]
+// CHECK3-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP62]]
+// CHECK3-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP61]]
 // CHECK3-NEXT:    [[TMP63:%.*]] = getelementptr i32, ptr [[ARRAYIDX16]], i32 1
 // CHECK3-NEXT:    [[TMP64:%.*]] = ptrtoint ptr [[ARRAYIDX14]] to i64
 // CHECK3-NEXT:    [[TMP65:%.*]] = ptrtoint ptr [[TMP63]] to i64
@@ -5854,13 +5854,13 @@ void test_omp_all_memory()
 // CHECK3-NEXT:    [[TMP82:%.*]] = load i8, ptr [[B]], align 1
 // CHECK3-NEXT:    [[TMP83:%.*]] = sext i8 [[TMP82]] to i64
 // CHECK3-NEXT:    [[TMP84:%.*]] = mul nsw i64 4, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP84]]
-// CHECK3-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 [[TMP83]]
+// CHECK3-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP84]]
+// CHECK3-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 [[TMP83]]
 // CHECK3-NEXT:    [[TMP85:%.*]] = load i8, ptr [[B]], align 1
 // CHECK3-NEXT:    [[TMP86:%.*]] = sext i8 [[TMP85]] to i64
 // CHECK3-NEXT:    [[TMP87:%.*]] = mul nsw i64 9, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP87]]
-// CHECK3-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[TMP86]]
+// CHECK3-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP87]]
+// CHECK3-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[TMP86]]
 // CHECK3-NEXT:    [[TMP88:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1
 // CHECK3-NEXT:    [[TMP89:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64
 // CHECK3-NEXT:    [[TMP90:%.*]] = ptrtoint ptr [[TMP88]] to i64
@@ -5899,8 +5899,8 @@ void test_omp_all_memory()
 // CHECK3-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP107]], i32 0, i32 2
 // CHECK3-NEXT:    store i8 3, ptr [[TMP110]], align 8
 // CHECK3-NEXT:    [[TMP111:%.*]] = mul nsw i64 0, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP111]]
-// CHECK3-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX33]], i64 3
+// CHECK3-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP111]]
+// CHECK3-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX33]], i64 3
 // CHECK3-NEXT:    [[TMP112:%.*]] = load i32, ptr @a, align 4
 // CHECK3-NEXT:    [[TMP113:%.*]] = sext i32 [[TMP112]] to i64
 // CHECK3-NEXT:    [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP113]], 1
@@ -5908,8 +5908,8 @@ void test_omp_all_memory()
 // CHECK3-NEXT:    [[TMP115:%.*]] = sext i32 [[TMP114]] to i64
 // CHECK3-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP115]]
 // CHECK3-NEXT:    [[TMP116:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP116]]
-// CHECK3-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]]
+// CHECK3-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP116]]
+// CHECK3-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]]
 // CHECK3-NEXT:    [[TMP117:%.*]] = getelementptr i32, ptr [[ARRAYIDX36]], i32 1
 // CHECK3-NEXT:    [[TMP118:%.*]] = ptrtoint ptr [[ARRAYIDX34]] to i64
 // CHECK3-NEXT:    [[TMP119:%.*]] = ptrtoint ptr [[TMP117]] to i64
@@ -6789,9 +6789,9 @@ void test_omp_all_memory()
 // CHECK4-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP29]], i32 0, i32 2
 // CHECK4-NEXT:    store i8 1, ptr [[TMP32]], align 8
 // CHECK4-NEXT:    [[TMP33:%.*]] = mul nsw i64 0, [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP33]]
+// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP33]]
 // CHECK4-NEXT:    [[TMP34:%.*]] = mul nsw i64 9, [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]]
+// CHECK4-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]]
 // CHECK4-NEXT:    [[TMP35:%.*]] = getelementptr i32, ptr [[ARRAYIDX4]], i32 1
 // CHECK4-NEXT:    [[TMP36:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK4-NEXT:    [[TMP37:%.*]] = ptrtoint ptr [[TMP35]] to i64
@@ -6830,13 +6830,13 @@ void test_omp_all_memory()
 // CHECK4-NEXT:    [[TMP57:%.*]] = load i8, ptr [[B]], align 1
 // CHECK4-NEXT:    [[TMP58:%.*]] = sext i8 [[TMP57]] to i64
 // CHECK4-NEXT:    [[TMP59:%.*]] = mul nsw i64 4, [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP59]]
-// CHECK4-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX13]], i64 [[TMP58]]
+// CHECK4-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP59]]
+// CHECK4-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX13]], i64 [[TMP58]]
 // CHECK4-NEXT:    [[TMP60:%.*]] = load i8, ptr [[B]], align 1
 // CHECK4-NEXT:    [[TMP61:%.*]] = sext i8 [[TMP60]] to i64
 // CHECK4-NEXT:    [[TMP62:%.*]] = mul nsw i64 9, [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP62]]
-// CHECK4-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP61]]
+// CHECK4-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP62]]
+// CHECK4-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP61]]
 // CHECK4-NEXT:    [[TMP63:%.*]] = getelementptr i32, ptr [[ARRAYIDX16]], i32 1
 // CHECK4-NEXT:    [[TMP64:%.*]] = ptrtoint ptr [[ARRAYIDX14]] to i64
 // CHECK4-NEXT:    [[TMP65:%.*]] = ptrtoint ptr [[TMP63]] to i64
@@ -6870,13 +6870,13 @@ void test_omp_all_memory()
 // CHECK4-NEXT:    [[TMP82:%.*]] = load i8, ptr [[B]], align 1
 // CHECK4-NEXT:    [[TMP83:%.*]] = sext i8 [[TMP82]] to i64
 // CHECK4-NEXT:    [[TMP84:%.*]] = mul nsw i64 4, [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP84]]
-// CHECK4-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 [[TMP83]]
+// CHECK4-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP84]]
+// CHECK4-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 [[TMP83]]
 // CHECK4-NEXT:    [[TMP85:%.*]] = load i8, ptr [[B]], align 1
 // CHECK4-NEXT:    [[TMP86:%.*]] = sext i8 [[TMP85]] to i64
 // CHECK4-NEXT:    [[TMP87:%.*]] = mul nsw i64 9, [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP87]]
-// CHECK4-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[TMP86]]
+// CHECK4-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP87]]
+// CHECK4-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[TMP86]]
 // CHECK4-NEXT:    [[TMP88:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1
 // CHECK4-NEXT:    [[TMP89:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64
 // CHECK4-NEXT:    [[TMP90:%.*]] = ptrtoint ptr [[TMP88]] to i64
@@ -6915,8 +6915,8 @@ void test_omp_all_memory()
 // CHECK4-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP107]], i32 0, i32 2
 // CHECK4-NEXT:    store i8 3, ptr [[TMP110]], align 8
 // CHECK4-NEXT:    [[TMP111:%.*]] = mul nsw i64 0, [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP111]]
-// CHECK4-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX33]], i64 3
+// CHECK4-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP111]]
+// CHECK4-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX33]], i64 3
 // CHECK4-NEXT:    [[TMP112:%.*]] = load i32, ptr @a, align 4
 // CHECK4-NEXT:    [[TMP113:%.*]] = sext i32 [[TMP112]] to i64
 // CHECK4-NEXT:    [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP113]], 1
@@ -6924,8 +6924,8 @@ void test_omp_all_memory()
 // CHECK4-NEXT:    [[TMP115:%.*]] = sext i32 [[TMP114]] to i64
 // CHECK4-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP115]]
 // CHECK4-NEXT:    [[TMP116:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP116]]
-// CHECK4-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]]
+// CHECK4-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP116]]
+// CHECK4-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]]
 // CHECK4-NEXT:    [[TMP117:%.*]] = getelementptr i32, ptr [[ARRAYIDX36]], i32 1
 // CHECK4-NEXT:    [[TMP118:%.*]] = ptrtoint ptr [[ARRAYIDX34]] to i64
 // CHECK4-NEXT:    [[TMP119:%.*]] = ptrtoint ptr [[TMP117]] to i64
@@ -7808,9 +7808,9 @@ void test_omp_all_memory()
 // CHECK3-51-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP29]], i32 0, i32 2
 // CHECK3-51-NEXT:    store i8 1, ptr [[TMP32]], align 8
 // CHECK3-51-NEXT:    [[TMP33:%.*]] = mul nsw i64 0, [[TMP1]]
-// CHECK3-51-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP33]]
+// CHECK3-51-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP33]]
 // CHECK3-51-NEXT:    [[TMP34:%.*]] = mul nsw i64 9, [[TMP1]]
-// CHECK3-51-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]]
+// CHECK3-51-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]]
 // CHECK3-51-NEXT:    [[TMP35:%.*]] = getelementptr i32, ptr [[ARRAYIDX4]], i32 1
 // CHECK3-51-NEXT:    [[TMP36:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK3-51-NEXT:    [[TMP37:%.*]] = ptrtoint ptr [[TMP35]] to i64
@@ -7849,13 +7849,13 @@ void test_omp_all_memory()
 // CHECK3-51-NEXT:    [[TMP57:%.*]] = load i8, ptr [[B]], align 1
 // CHECK3-51-NEXT:    [[TMP58:%.*]] = sext i8 [[TMP57]] to i64
 // CHECK3-51-NEXT:    [[TMP59:%.*]] = mul nsw i64 4, [[TMP1]]
-// CHECK3-51-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP59]]
-// CHECK3-51-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX13]], i64 [[TMP58]]
+// CHECK3-51-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP59]]
+// CHECK3-51-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX13]], i64 [[TMP58]]
 // CHECK3-51-NEXT:    [[TMP60:%.*]] = load i8, ptr [[B]], align 1
 // CHECK3-51-NEXT:    [[TMP61:%.*]] = sext i8 [[TMP60]] to i64
 // CHECK3-51-NEXT:    [[TMP62:%.*]] = mul nsw i64 9, [[TMP1]]
-// CHECK3-51-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP62]]
-// CHECK3-51-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP61]]
+// CHECK3-51-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP62]]
+// CHECK3-51-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP61]]
 // CHECK3-51-NEXT:    [[TMP63:%.*]] = getelementptr i32, ptr [[ARRAYIDX16]], i32 1
 // CHECK3-51-NEXT:    [[TMP64:%.*]] = ptrtoint ptr [[ARRAYIDX14]] to i64
 // CHECK3-51-NEXT:    [[TMP65:%.*]] = ptrtoint ptr [[TMP63]] to i64
@@ -7889,13 +7889,13 @@ void test_omp_all_memory()
 // CHECK3-51-NEXT:    [[TMP82:%.*]] = load i8, ptr [[B]], align 1
 // CHECK3-51-NEXT:    [[TMP83:%.*]] = sext i8 [[TMP82]] to i64
 // CHECK3-51-NEXT:    [[TMP84:%.*]] = mul nsw i64 4, [[TMP1]]
-// CHECK3-51-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP84]]
-// CHECK3-51-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 [[TMP83]]
+// CHECK3-51-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP84]]
+// CHECK3-51-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 [[TMP83]]
 // CHECK3-51-NEXT:    [[TMP85:%.*]] = load i8, ptr [[B]], align 1
 // CHECK3-51-NEXT:    [[TMP86:%.*]] = sext i8 [[TMP85]] to i64
 // CHECK3-51-NEXT:    [[TMP87:%.*]] = mul nsw i64 9, [[TMP1]]
-// CHECK3-51-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP87]]
-// CHECK3-51-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[TMP86]]
+// CHECK3-51-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP87]]
+// CHECK3-51-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[TMP86]]
 // CHECK3-51-NEXT:    [[TMP88:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1
 // CHECK3-51-NEXT:    [[TMP89:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64
 // CHECK3-51-NEXT:    [[TMP90:%.*]] = ptrtoint ptr [[TMP88]] to i64
@@ -7934,8 +7934,8 @@ void test_omp_all_memory()
 // CHECK3-51-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP107]], i32 0, i32 2
 // CHECK3-51-NEXT:    store i8 3, ptr [[TMP110]], align 8
 // CHECK3-51-NEXT:    [[TMP111:%.*]] = mul nsw i64 0, [[TMP1]]
-// CHECK3-51-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP111]]
-// CHECK3-51-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX33]], i64 3
+// CHECK3-51-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP111]]
+// CHECK3-51-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX33]], i64 3
 // CHECK3-51-NEXT:    [[TMP112:%.*]] = load i32, ptr @a, align 4
 // CHECK3-51-NEXT:    [[TMP113:%.*]] = sext i32 [[TMP112]] to i64
 // CHECK3-51-NEXT:    [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP113]], 1
@@ -7943,8 +7943,8 @@ void test_omp_all_memory()
 // CHECK3-51-NEXT:    [[TMP115:%.*]] = sext i32 [[TMP114]] to i64
 // CHECK3-51-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP115]]
 // CHECK3-51-NEXT:    [[TMP116:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP1]]
-// CHECK3-51-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP116]]
-// CHECK3-51-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]]
+// CHECK3-51-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP116]]
+// CHECK3-51-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]]
 // CHECK3-51-NEXT:    [[TMP117:%.*]] = getelementptr i32, ptr [[ARRAYIDX36]], i32 1
 // CHECK3-51-NEXT:    [[TMP118:%.*]] = ptrtoint ptr [[ARRAYIDX34]] to i64
 // CHECK3-51-NEXT:    [[TMP119:%.*]] = ptrtoint ptr [[TMP117]] to i64
@@ -7981,8 +7981,8 @@ void test_omp_all_memory()
 // CHECK3-51-NEXT:    [[TMP138:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP135]], i32 0, i32 2
 // CHECK3-51-NEXT:    store i8 8, ptr [[TMP138]], align 8
 // CHECK3-51-NEXT:    [[TMP139:%.*]] = mul nsw i64 0, [[TMP1]]
-// CHECK3-51-NEXT:    [[ARRAYIDX43:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP139]]
-// CHECK3-51-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX43]], i64 3
+// CHECK3-51-NEXT:    [[ARRAYIDX43:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP139]]
+// CHECK3-51-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX43]], i64 3
 // CHECK3-51-NEXT:    [[TMP140:%.*]] = load i32, ptr @a, align 4
 // CHECK3-51-NEXT:    [[TMP141:%.*]] = sext i32 [[TMP140]] to i64
 // CHECK3-51-NEXT:    [[LEN_SUB_145:%.*]] = sub nsw i64 [[TMP141]], 1
@@ -7990,8 +7990,8 @@ void test_omp_all_memory()
 // CHECK3-51-NEXT:    [[TMP143:%.*]] = sext i32 [[TMP142]] to i64
 // CHECK3-51-NEXT:    [[LB_ADD_LEN46:%.*]] = add nsw i64 -1, [[TMP143]]
 // CHECK3-51-NEXT:    [[TMP144:%.*]] = mul nsw i64 [[LB_ADD_LEN46]], [[TMP1]]
-// CHECK3-51-NEXT:    [[ARRAYIDX47:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP144]]
-// CHECK3-51-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX47]], i64 [[LEN_SUB_145]]
+// CHECK3-51-NEXT:    [[ARRAYIDX47:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP144]]
+// CHECK3-51-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX47]], i64 [[LEN_SUB_145]]
 // CHECK3-51-NEXT:    [[TMP145:%.*]] = getelementptr i32, ptr [[ARRAYIDX48]], i32 1
 // CHECK3-51-NEXT:    [[TMP146:%.*]] = ptrtoint ptr [[ARRAYIDX44]] to i64
 // CHECK3-51-NEXT:    [[TMP147:%.*]] = ptrtoint ptr [[TMP145]] to i64
@@ -9323,9 +9323,9 @@ void test_omp_all_memory()
 // CHECK4-51-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP29]], i32 0, i32 2
 // CHECK4-51-NEXT:    store i8 1, ptr [[TMP32]], align 8
 // CHECK4-51-NEXT:    [[TMP33:%.*]] = mul nsw i64 0, [[TMP1]]
-// CHECK4-51-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP33]]
+// CHECK4-51-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP33]]
 // CHECK4-51-NEXT:    [[TMP34:%.*]] = mul nsw i64 9, [[TMP1]]
-// CHECK4-51-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]]
+// CHECK4-51-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]]
 // CHECK4-51-NEXT:    [[TMP35:%.*]] = getelementptr i32, ptr [[ARRAYIDX4]], i32 1
 // CHECK4-51-NEXT:    [[TMP36:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
 // CHECK4-51-NEXT:    [[TMP37:%.*]] = ptrtoint ptr [[TMP35]] to i64
@@ -9364,13 +9364,13 @@ void test_omp_all_memory()
 // CHECK4-51-NEXT:    [[TMP57:%.*]] = load i8, ptr [[B]], align 1
 // CHECK4-51-NEXT:    [[TMP58:%.*]] = sext i8 [[TMP57]] to i64
 // CHECK4-51-NEXT:    [[TMP59:%.*]] = mul nsw i64 4, [[TMP1]]
-// CHECK4-51-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP59]]
-// CHECK4-51-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX13]], i64 [[TMP58]]
+// CHECK4-51-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP59]]
+// CHECK4-51-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX13]], i64 [[TMP58]]
 // CHECK4-51-NEXT:    [[TMP60:%.*]] = load i8, ptr [[B]], align 1
 // CHECK4-51-NEXT:    [[TMP61:%.*]] = sext i8 [[TMP60]] to i64
 // CHECK4-51-NEXT:    [[TMP62:%.*]] = mul nsw i64 9, [[TMP1]]
-// CHECK4-51-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP62]]
-// CHECK4-51-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP61]]
+// CHECK4-51-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP62]]
+// CHECK4-51-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP61]]
 // CHECK4-51-NEXT:    [[TMP63:%.*]] = getelementptr i32, ptr [[ARRAYIDX16]], i32 1
 // CHECK4-51-NEXT:    [[TMP64:%.*]] = ptrtoint ptr [[ARRAYIDX14]] to i64
 // CHECK4-51-NEXT:    [[TMP65:%.*]] = ptrtoint ptr [[TMP63]] to i64
@@ -9404,13 +9404,13 @@ void test_omp_all_memory()
 // CHECK4-51-NEXT:    [[TMP82:%.*]] = load i8, ptr [[B]], align 1
 // CHECK4-51-NEXT:    [[TMP83:%.*]] = sext i8 [[TMP82]] to i64
 // CHECK4-51-NEXT:    [[TMP84:%.*]] = mul nsw i64 4, [[TMP1]]
-// CHECK4-51-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP84]]
-// CHECK4-51-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 [[TMP83]]
+// CHECK4-51-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP84]]
+// CHECK4-51-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 [[TMP83]]
 // CHECK4-51-NEXT:    [[TMP85:%.*]] = load i8, ptr [[B]], align 1
 // CHECK4-51-NEXT:    [[TMP86:%.*]] = sext i8 [[TMP85]] to i64
 // CHECK4-51-NEXT:    [[TMP87:%.*]] = mul nsw i64 9, [[TMP1]]
-// CHECK4-51-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP87]]
-// CHECK4-51-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[TMP86]]
+// CHECK4-51-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP87]]
+// CHECK4-51-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[TMP86]]
 // CHECK4-51-NEXT:    [[TMP88:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1
 // CHECK4-51-NEXT:    [[TMP89:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64
 // CHECK4-51-NEXT:    [[TMP90:%.*]] = ptrtoint ptr [[TMP88]] to i64
@@ -9449,8 +9449,8 @@ void test_omp_all_memory()
 // CHECK4-51-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP107]], i32 0, i32 2
 // CHECK4-51-NEXT:    store i8 3, ptr [[TMP110]], align 8
 // CHECK4-51-NEXT:    [[TMP111:%.*]] = mul nsw i64 0, [[TMP1]]
-// CHECK4-51-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP111]]
-// CHECK4-51-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX33]], i64 3
+// CHECK4-51-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP111]]
+// CHECK4-51-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX33]], i64 3
 // CHECK4-51-NEXT:    [[TMP112:%.*]] = load i32, ptr @a, align 4
 // CHECK4-51-NEXT:    [[TMP113:%.*]] = sext i32 [[TMP112]] to i64
 // CHECK4-51-NEXT:    [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP113]], 1
@@ -9458,8 +9458,8 @@ void test_omp_all_memory()
 // CHECK4-51-NEXT:    [[TMP115:%.*]] = sext i32 [[TMP114]] to i64
 // CHECK4-51-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP115]]
 // CHECK4-51-NEXT:    [[TMP116:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP1]]
-// CHECK4-51-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP116]]
-// CHECK4-51-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]]
+// CHECK4-51-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP116]]
+// CHECK4-51-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]]
 // CHECK4-51-NEXT:    [[TMP117:%.*]] = getelementptr i32, ptr [[ARRAYIDX36]], i32 1
 // CHECK4-51-NEXT:    [[TMP118:%.*]] = ptrtoint ptr [[ARRAYIDX34]] to i64
 // CHECK4-51-NEXT:    [[TMP119:%.*]] = ptrtoint ptr [[TMP117]] to i64
diff --git clang/test/OpenMP/task_in_reduction_codegen.cpp clang/test/OpenMP/task_in_reduction_codegen.cpp
index aa2a47813799..29dc12978d7d 100644
--- clang/test/OpenMP/task_in_reduction_codegen.cpp
+++ clang/test/OpenMP/task_in_reduction_codegen.cpp
@@ -90,7 +90,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP2]], align 16
 // CHECK1-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
 // CHECK1-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[A]], ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -105,7 +105,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP9]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[B]], ptr [[TMP11]], align 8
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 1
@@ -120,7 +120,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..2, ptr [[TMP16]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP17]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC_ADDR]], ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 1
@@ -138,7 +138,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP25:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 3, ptr [[DOTRD_INPUT_]])
 // CHECK1-NEXT:    store ptr [[TMP25]], ptr [[DOTTASK_RED_]], align 8
 // CHECK1-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[C]], ptr [[TMP26]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
@@ -153,7 +153,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..6, ptr [[TMP31]], align 8
 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP33]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1
diff --git clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp
index faf86479dfda..a8577c7a1357 100644
--- clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp
+++ clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp
@@ -67,7 +67,7 @@ int main(int argc, char **argv) {
 // CHECK-DAG:   [[A_REF]] = getelementptr inbounds nuw [[T1]], ptr [[GEPA:%[^,]+]], i32 0, i32 0
 // CHECK-DAG:   store ptr [[A]], ptr [[A_REF:[^,]+]],
 // CHECK-DAG:   [[A_REF]] = getelementptr inbounds nuw [[T1]], ptr [[GEPA]], i32 0, i32 1
-// CHECK-DAG:   [[GEPA]] = getelementptr inbounds [3 x [[T1]]], ptr [[RD_IN1]], i64 0, i64
+// CHECK-DAG:   [[GEPA]] = getelementptr inbounds nuw [3 x [[T1]]], ptr [[RD_IN1]], i64 0, i64
 // CHECK-DAG:   [[TMP6:%.+]] = getelementptr inbounds nuw [[T1]], ptr [[GEPA]], i32 0, i32 2
 // CHECK-DAG:   store i64 4, ptr [[TMP6]],
 // CHECK-DAG:   [[TMP7:%.+]] = getelementptr inbounds nuw [[T1]], ptr [[GEPA]], i32 0, i32 3
@@ -82,7 +82,7 @@ int main(int argc, char **argv) {
 // CHECK-DAG:   [[TMP12]] = getelementptr inbounds nuw [[T1]], ptr [[GEPB:%[^,]+]], i32 0, i32 0
 // CHECK-DAG:   store ptr [[B]], ptr [[TMP12:%[^,]+]],
 // CHECK-DAG:   [[TMP12]] = getelementptr inbounds nuw [[T1]], ptr [[GEPB]], i32 0, i32 1
-// CHECK-DAG:   [[GEPB]] = getelementptr inbounds [3 x [[T1]]], ptr [[RD_IN1]], i64 0, i64
+// CHECK-DAG:   [[GEPB]] = getelementptr inbounds nuw [3 x [[T1]]], ptr [[RD_IN1]], i64 0, i64
 // CHECK-DAG:   [[TMP14:%.+]] = getelementptr inbounds nuw [[T1]], ptr [[GEPB]], i32 0, i32 2
 // CHECK-DAG:   store i64 4, ptr [[TMP14]],
 // CHECK-DAG:   [[TMP15:%.+]] = getelementptr inbounds nuw [[T1]], ptr [[GEPB]], i32 0, i32 3
@@ -97,7 +97,7 @@ int main(int argc, char **argv) {
 // CHECK-DAG:   [[TMP20]] = getelementptr inbounds nuw [[T1]], ptr [[GEPARGC:%[^,]+]], i32 0, i32 0
 // CHECK-DAG:   store ptr [[ARGC_ADDR]], ptr [[TMP20:%[^,]+]],
 // CHECK-DAG:   [[TMP20]] = getelementptr inbounds nuw [[T1]], ptr [[GEPARGC]], i32 0, i32 1
-// CHECK-DAG:   [[GEPARGC]] = getelementptr inbounds [3 x [[T1]]], ptr [[RD_IN1]], i64 0, i64
+// CHECK-DAG:   [[GEPARGC]] = getelementptr inbounds nuw [3 x [[T1]]], ptr [[RD_IN1]], i64 0, i64
 // CHECK-DAG:   [[TMP22:%.+]] = getelementptr inbounds nuw [[T1]], ptr [[GEPARGC]], i32 0, i32 2
 // CHECK-DAG:   store i64 4, ptr [[TMP22]],
 // CHECK-DAG:   [[TMP23:%.+]] = getelementptr inbounds nuw [[T1]], ptr [[GEPARGC]], i32 0, i32 3
@@ -116,7 +116,7 @@ int main(int argc, char **argv) {
 // CHECK-DAG:   [[TMP30]] = getelementptr inbounds nuw [[T2]], ptr [[GEPC:%[^,]+]], i32 0, i32 0
 // CHECK-DAG:   store ptr [[C]], ptr [[TMP30:%[^,]+]],
 // CHECK-DAG:   [[TMP30]] = getelementptr inbounds nuw [[T2]], ptr [[GEPC]], i32 0, i32 1
-// CHECK-DAG:   [[GEPC]] = getelementptr inbounds [2 x [[T2]]], ptr [[RD_IN2]], i64 0, i64
+// CHECK-DAG:   [[GEPC]] = getelementptr inbounds nuw [2 x [[T2]]], ptr [[RD_IN2]], i64 0, i64
 // CHECK-DAG:   [[TMP32:%.+]] = getelementptr inbounds nuw [[T2]], ptr [[GEPC]], i32 0, i32 2
 // CHECK-DAG:   store i64 20, ptr [[TMP32]],
 // CHECK-DAG:   [[TMP33:%.+]] = getelementptr inbounds nuw [[T2]], ptr [[GEPC]], i32 0, i32 3
@@ -131,7 +131,7 @@ int main(int argc, char **argv) {
 // CHECK-DAG:   [[TMP38]] = getelementptr inbounds nuw [[T2]], ptr [[GEPVLA:%[^,]+]], i32 0, i32 0
 // CHECK-DAG:   store ptr [[VLA]], ptr [[TMP38:%[^,]+]],
 // CHECK-DAG:   [[TMP38]] = getelementptr inbounds nuw [[T2]], ptr [[GEPVLA]], i32 0, i32 1
-// CHECK-DAG:   [[GEPVLA]] = getelementptr inbounds [2 x [[T2]]], ptr [[RD_IN2]], i64 0, i64
+// CHECK-DAG:   [[GEPVLA]] = getelementptr inbounds nuw [2 x [[T2]]], ptr [[RD_IN2]], i64 0, i64
 // CHECK-DAG:   [[TMP40:%.+]] = mul nuw i64 [[VLA_SIZE]], 2
 // CHECK-DAG:   [[TMP41:%.+]] = udiv exact i64 [[TMP40]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64)
 // CHECK-DAG:   [[TMP42:%.+]] = getelementptr inbounds nuw [[T2]], ptr [[GEPVLA]], i32 0, i32 2
diff --git clang/test/OpenMP/taskloop_in_reduction_codegen.cpp clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
index ae0d00775614..87b4cd2caf18 100644
--- clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
+++ clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
@@ -76,7 +76,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP2]], align 16
 // CHECK1-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
 // CHECK1-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[A]], ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -91,7 +91,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP9]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[B]], ptr [[TMP11]], align 8
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 1
@@ -106,7 +106,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..2, ptr [[TMP16]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP17]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC_ADDR]], ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 1
@@ -124,7 +124,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP25:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 3, ptr [[DOTRD_INPUT_]])
 // CHECK1-NEXT:    store ptr [[TMP25]], ptr [[DOTTASK_RED_]], align 8
 // CHECK1-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[C]], ptr [[TMP26]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
@@ -139,7 +139,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..6, ptr [[TMP31]], align 8
 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP33]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1
diff --git clang/test/OpenMP/taskloop_reduction_codegen.cpp clang/test/OpenMP/taskloop_reduction_codegen.cpp
index 3cdc88ba20b7..6eca033eca55 100644
--- clang/test/OpenMP/taskloop_reduction_codegen.cpp
+++ clang/test/OpenMP/taskloop_reduction_codegen.cpp
@@ -83,9 +83,9 @@ sum = 0.0;
 // CHECK-DAG:    store ptr @[[RED_COMB1:.+]], ptr [[TMP25]],
 // CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK-DAG:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 4, i1 false)
-// CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 0
+// CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 0
 // CHECK-DAG:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, %
-// CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]]
+// CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]]
 // CHECK-DAG:    store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]],
 // CHECK-DAG:    [[TMP28]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
 // CHECK-DAG:    store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]],
@@ -137,10 +137,10 @@ sum = 0.0;
 // CHECK-DAG:    store ptr @[[RED_COMB4:.+]], ptr [[TMP59]],
 // CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_8]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, ptr [[TMP60]],
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
 // CHECK:    [[TMP62:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 4, ptr [[DOTRD_INPUT_]])
 // CHECK:    [[TMP63:%.*]] = load i32, ptr [[N]],
 // CHECK:    store i32 [[TMP63]], ptr [[DOTCAPTURE_EXPR_]],
diff --git clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
index 6da28d2d973c..9e4e51a44274 100644
--- clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
+++ clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
@@ -76,7 +76,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP2]], align 16
 // CHECK1-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
 // CHECK1-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[A]], ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -91,7 +91,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP9]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[B]], ptr [[TMP11]], align 8
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 1
@@ -106,7 +106,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..2, ptr [[TMP16]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP17]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC_ADDR]], ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 1
@@ -124,7 +124,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP25:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 3, ptr [[DOTRD_INPUT_]])
 // CHECK1-NEXT:    store ptr [[TMP25]], ptr [[DOTTASK_RED_]], align 8
 // CHECK1-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[C]], ptr [[TMP26]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
@@ -139,7 +139,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..6, ptr [[TMP31]], align 8
 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP33]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1
diff --git clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp
index d6e40831484a..83ae053cfd9b 100644
--- clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp
+++ clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp
@@ -80,9 +80,9 @@ sum = 0.0;
 // CHECK-DAG:    store ptr @[[RED_COMB1:.+]], ptr [[TMP25]],
 // CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK-DAG:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 4, i1 false)
-// CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 0
+// CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 0
 // CHECK-DAG:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, %
-// CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]]
+// CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]]
 // CHECK-DAG:    store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]],
 // CHECK-DAG:    [[TMP28]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
 // CHECK-DAG:    store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]],
@@ -134,10 +134,10 @@ sum = 0.0;
 // CHECK-DAG:    store ptr @[[RED_COMB4:.+]], ptr [[TMP59]],
 // CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_8]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, ptr [[TMP60]],
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64
 // CHECK:    [[TMP62:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 4, ptr [[DOTRD_INPUT_]])
 // CHECK:    [[TMP63:%.*]] = load i32, ptr [[N]],
 // CHECK:    store i32 [[TMP63]], ptr [[DOTCAPTURE_EXPR_]],
diff --git clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
index be499e0b3654..7987c2de7dd8 100644
--- clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
+++ clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -100,16 +100,16 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[ARGC1]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]]
 // CHECK1-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9
 // CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
 // CHECK1-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64
 // CHECK1-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
@@ -139,7 +139,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]]
 // CHECK1-NEXT:    store ptr [[_TMP5]], ptr [[TMP]], align 8
 // CHECK1-NEXT:    store ptr [[TMP21]], ptr [[_TMP5]], align 8
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC1]], ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -154,19 +154,19 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb., ptr [[TMP27]], align 8
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0
 // CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0
 // CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]]
 // CHECK1-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9
 // CHECK1-NEXT:    [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]]
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP29]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8
@@ -444,16 +444,16 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[ARGC1]], align 4
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 0
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]]
 // CHECK1-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP7]], i64 9
 // CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]]
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]]
 // CHECK1-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
 // CHECK1-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64
 // CHECK1-NEXT:    [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]]
@@ -483,7 +483,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP22]]
 // CHECK1-NEXT:    store ptr [[_TMP6]], ptr [[_TMP5]], align 8
 // CHECK1-NEXT:    store ptr [[TMP23]], ptr [[_TMP6]], align 8
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[ARGC1]], ptr [[TMP24]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
@@ -498,19 +498,19 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr @.red_comb..4, ptr [[TMP29]], align 8
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK1-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false)
-// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1
 // CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP32]], i64 0
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i64 0
+// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i64 0
 // CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
 // CHECK1-NEXT:    [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP35]]
 // CHECK1-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP36]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP36]], i64 9
 // CHECK1-NEXT:    [[TMP37:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]]
+// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]]
 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP31]], align 8
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
 // CHECK1-NEXT:    store ptr [[ARRAYIDX9]], ptr [[TMP38]], align 8
@@ -831,9 +831,9 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]]
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9
+// CHECK1-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9
 // CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
+// CHECK1-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]]
 // CHECK1-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64
 // CHECK1-NEXT:    [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
diff --git clang/test/Parser/cxx11-user-defined-literals.cpp clang/test/Parser/cxx11-user-defined-literals.cpp
index 1a7e78058822..cdd06729efc3 100644
--- clang/test/Parser/cxx11-user-defined-literals.cpp
+++ clang/test/Parser/cxx11-user-defined-literals.cpp
@@ -21,7 +21,8 @@ int f() {
   asm("mov %eax, %rdx"_foo); // expected-error {{user-defined suffix cannot be used here}}
 }
 
-static_assert(true, "foo"_bar); // expected-error {{user-defined suffix cannot be used here}}
+static_assert(true, "foo"_bar); // expected-error {{no matching literal operator for call to 'operator""_bar'}}
+// expected-warning@-1 {{'static_assert' with a user-generated message is a C++26 extension}}
 
 int cake() __attribute__((availability(macosx, unavailable, message = "is a lie"_x))); // expected-error {{user-defined suffix cannot be used here}}
 
diff --git clang/test/Parser/cxx1z-class-template-argument-deduction.cpp clang/test/Parser/cxx1z-class-template-argument-deduction.cpp
index 2dd61baac31b..a1594333abae 100644
--- clang/test/Parser/cxx1z-class-template-argument-deduction.cpp
+++ clang/test/Parser/cxx1z-class-template-argument-deduction.cpp
@@ -255,3 +255,15 @@ void f() {
   GH57495::vector.d; // expected-error {{cannot use dot operator on a type}}
 }
 }
+
+namespace GH107887 {
+
+namespace a {
+template <class> struct pair; // expected-note 3{{declared here}}
+}
+template <class T2> pair() -> pair<T2>;   // expected-error 2{{no template named 'pair'}} \
+                                          // expected-error {{deduction guide must be declared in the same scope}} \
+                                          // expected-error {{cannot be deduced}} \
+                                          // expected-note {{non-deducible template parameter 'T2'}}
+
+}
diff --git clang/test/ParserHLSL/hlsl_is_rov_attr.hlsl clang/test/ParserHLSL/hlsl_is_rov_attr.hlsl
index 29850828ad3b..cf21ec4d380d 100644
--- clang/test/ParserHLSL/hlsl_is_rov_attr.hlsl
+++ clang/test/ParserHLSL/hlsl_is_rov_attr.hlsl
@@ -1,9 +1,16 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
 
-
-// CHECK: -HLSLROVAttr 0x{{[0-9a-f]+}} <col:10, col:16>
-struct [[hlsl::is_rov]] Eg1 {
-  int i;  
+// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} {{.*}} struct MyBuffer definition
+// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:6:3, col:68> col:68 h '__hlsl_resource_t {{\[\[}}hlsl::resource_class(UAV)]] {{\[\[}}hlsl::is_rov]]':'__hlsl_resource_t'
+struct MyBuffer {
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov]] h;
 };
 
-Eg1 e1;
+// CHECK: VarDecl 0x{{[0-9a-f]+}} <line:10:1, col:66> col:66 res '__hlsl_resource_t {{\[\[}}hlsl::resource_class(SRV)]] {{\[\[}}hlsl::is_rov]]':'__hlsl_resource_t'
+__hlsl_resource_t [[hlsl::is_rov]] [[hlsl::resource_class(SRV)]] res;
+
+// CHECK: FunctionDecl 0x{{[0-9a-f]+}} <line:14:1, line:16:1> line:14:6 f 'void ()
+// CHECK: VarDecl 0x{{[0-9a-f]+}} <col:3, col:72> col:72 r '__hlsl_resource_t {{\[\[}}hlsl::resource_class(Sampler)]] {{\[\[}}hlsl::is_rov]]':'__hlsl_resource_t'
+void f() {
+  __hlsl_resource_t [[hlsl::resource_class(Sampler)]] [[hlsl::is_rov]] r;
+}
diff --git clang/test/ParserHLSL/hlsl_is_rov_attr_error.hlsl clang/test/ParserHLSL/hlsl_is_rov_attr_error.hlsl
index a21fed22220b..15685bd1a3ba 100644
--- clang/test/ParserHLSL/hlsl_is_rov_attr_error.hlsl
+++ clang/test/ParserHLSL/hlsl_is_rov_attr_error.hlsl
@@ -1,15 +1,16 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s -verify
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -o - %s -verify
 
-// expected-error@+1{{'is_rov' attribute takes no arguments}}
-struct [[hlsl::is_rov(3)]] Eg1 {
-  int i;  
-};
+// expected-error@+1{{'is_rov' attribute cannot be applied to a declaration}}
+[[hlsl::is_rov]] __hlsl_resource_t res0;
 
-Eg1 e1;
+// expected-error@+1{{HLSL resource needs to have [[hlsl::resource_class()]] attribute}}
+__hlsl_resource_t [[hlsl::is_rov]] res1;
 
+// expected-error@+1{{'is_rov' attribute takes no arguments}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov(3)]] res2;
+  
 // expected-error@+1{{use of undeclared identifier 'gibberish'}}
-struct [[hlsl::is_rov(gibberish)]] Eg2 {
-  int i;  
-};
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov(gibberish)]] res3;
 
-Eg2 e2;
+// expected-warning@+1{{attribute 'is_rov' is already applied}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov]] [[hlsl::is_rov]] res4;
diff --git clang/test/ParserHLSL/hlsl_resource_class_attr.hlsl clang/test/ParserHLSL/hlsl_resource_class_attr.hlsl
index 4b002e2d8900..f11a64d33839 100644
--- clang/test/ParserHLSL/hlsl_resource_class_attr.hlsl
+++ clang/test/ParserHLSL/hlsl_resource_class_attr.hlsl
@@ -1,32 +1,32 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
 
-
-// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <col:26> SRV
-struct Eg1 {
-  [[hlsl::resource_class(SRV)]] int i;  
+// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} {{.*}} struct MyBuffer definition
+// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:6:3, col:51> col:51 h '__hlsl_resource_t {{\[\[}}hlsl::resource_class(UAV)]]':'__hlsl_resource_t'
+struct MyBuffer {
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] h;
 };
 
-Eg1 e1;
-
-// CHECK: -CXXRecordDecl 0x{{[0-9a-f]+}} <line:13:1, line:15:1> line:13:8 referenced struct Eg2 definition
-// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <col:26> UAV
-struct Eg2 {
-  [[hlsl::resource_class(UAV)]] int i;
-};
-Eg2 e2;
+// CHECK: VarDecl 0x{{[0-9a-f]+}} <line:10:1, col:49> col:49 res '__hlsl_resource_t {{\[\[}}hlsl::resource_class(SRV)]]':'__hlsl_resource_t'
+__hlsl_resource_t [[hlsl::resource_class(SRV)]] res;
 
-// CHECK: -CXXRecordDecl 0x{{[0-9a-f]+}} <line:20:1, line:22:1> line:20:8 referenced struct Eg3 definition
-// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <col:26> CBuffer
-struct Eg3 {
-  [[hlsl::resource_class(CBuffer)]] int i;
-}; 
-Eg3 e3;
+// CHECK: FunctionDecl 0x{{[0-9a-f]+}} <line:14:1, line:16:1> line:14:6 f 'void ()
+// CHECK: VarDecl 0x{{[0-9a-f]+}} <col:3, col:55> col:55 r '__hlsl_resource_t {{\[\[}}hlsl::resource_class(Sampler)]]':'__hlsl_resource_t'
+void f() {
+  __hlsl_resource_t [[hlsl::resource_class(Sampler)]] r;
+}
 
-// CHECK: -CXXRecordDecl 0x{{[0-9a-f]+}} <line:27:1, line:29:1> line:27:8 referenced struct Eg4 definition
-// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <col:26> Sampler
-struct Eg4 {
-  [[hlsl::resource_class(Sampler)]] int i;
+// CHECK: ClassTemplateDecl 0x{{[0-9a-f]+}} <line:23:1, line:25:1> line:23:29 MyBuffer2
+// CHECK: TemplateTypeParmDecl 0x{{[0-9a-f]+}} <col:10, col:19> col:19 typename depth 0 index 0 T
+// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:22, line:25:1> line:23:29 struct MyBuffer2 definition
+// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:22, col:29> col:29 implicit struct MyBuffer2
+// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:24:3, col:51> col:51 h '__hlsl_resource_t {{\[\[}}hlsl::resource_class(UAV)]]':'__hlsl_resource_t'
+template<typename T> struct MyBuffer2 {
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] h;
 };
-Eg4 e4;
 
-RWBuffer<int> In : register(u1);
+// CHECK: ClassTemplateSpecializationDecl 0x{{[0-9a-f]+}} <line:23:1, line:25:1> line:23:29 struct MyBuffer2 definition implicit_instantiation
+// CHECK: TemplateArgument type 'float'
+// CHECK: BuiltinType 0x{{[0-9a-f]+}} 'float'
+// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:22, col:29> col:29 implicit struct MyBuffer2
+// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:24:3, col:51> col:51 h '__hlsl_resource_t {{\[\[}}hlsl::resource_class(UAV)]]':'__hlsl_resource_t'
+MyBuffer2<float> myBuffer2;
diff --git clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl
index 76bed2f06078..01ff1c007e2b 100644
--- clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl
+++ clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl
@@ -1,22 +1,19 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s -verify
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -o - %s -verify
 
-struct Eg1 {
-// expected-error@+1{{'resource_class' attribute takes one argument}}
-  [[hlsl::resource_class()]] int i;  
-};
+// expected-error@+1{{'resource_class' attribute cannot be applied to a declaration}}
+[[hlsl::resource_class(UAV)]] __hlsl_resource_t e0;
 
-Eg1 e1;
+// expected-error@+1{{'resource_class' attribute takes one argument}}
+__hlsl_resource_t [[hlsl::resource_class()]] e1;
 
-struct Eg2 {
 // expected-warning@+1{{ResourceClass attribute argument not supported: gibberish}}
-  [[hlsl::resource_class(gibberish)]] int i;  
-};
+__hlsl_resource_t [[hlsl::resource_class(gibberish)]] e2;
 
-Eg2 e2;
+// expected-warning@+1{{attribute 'resource_class' is already applied with different arguments}}
+__hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::resource_class(UAV)]] e3;
 
-// expected-warning@+1{{'resource_class' attribute only applies to non-static data members}}
-struct [[hlsl::resource_class(SRV)]] Eg3 {
-  int i;  
-};
+// expected-warning@+1{{attribute 'resource_class' is already applied}}
+__hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::resource_class(SRV)]] e4;
 
-Eg3 e3;
+// expected-error@+1{{'resource_class' attribute takes one argument}}
+__hlsl_resource_t [[hlsl::resource_class(SRV, "aa")]] e5;
diff --git clang/test/ParserHLSL/hlsl_resource_handle_attrs.hlsl clang/test/ParserHLSL/hlsl_resource_handle_attrs.hlsl
index 320d1160e761..7c3830a29197 100644
--- clang/test/ParserHLSL/hlsl_resource_handle_attrs.hlsl
+++ clang/test/ParserHLSL/hlsl_resource_handle_attrs.hlsl
@@ -1,15 +1,16 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -ast-dump -o - %s | FileCheck %s
 
 // CHECK: -ClassTemplateSpecializationDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> class RWBuffer definition implicit_instantiation
-// CHECK: -FieldDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> implicit referenced h 'float *'
-// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <<invalid sloc>> Implicit UAV
+// CHECK: -TemplateArgument type 'float'
+// CHECK: `-BuiltinType 0x{{[0-9a-f]+}} 'float'
+// CHECK: -FieldDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> implicit referenced h 'float * {{\[\[}}hlsl::resource_class(UAV)]]':'float *'
 // CHECK: -HLSLResourceAttr 0x{{[0-9a-f]+}} <<invalid sloc>> Implicit TypedBuffer
 RWBuffer<float> Buffer1;
 
-// CHECK: -ClassTemplateDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> implicit RasterizerOrderedBuffer
-// CHECK: -CXXRecordDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> implicit class RasterizerOrderedBuffer definition
-// CHECK: -FieldDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> implicit h 'element_type *'
-// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <<invalid sloc>> Implicit UAV
+// CHECK: -ClassTemplateSpecializationDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> class RasterizerOrderedBuffer definition implicit_instantiation
+// CHECK: -TemplateArgument type 'vector<float, 4>'
+// CHECK: `-ExtVectorType 0x{{[0-9a-f]+}} 'vector<float, 4>' 4
+// CHECK: `-BuiltinType 0x{{[0-9a-f]+}} 'float'
+// CHECK: -FieldDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> implicit referenced h 'vector<float *, 4> {{\[\[}}hlsl::resource_class(UAV)]] {{\[\[}}hlsl::is_rov]]':'vector<float *, 4>'
 // CHECK: -HLSLResourceAttr 0x{{[0-9a-f]+}} <<invalid sloc>> Implicit TypedBuffer
-// CHECK: -HLSLROVAttr 0x{{[0-9a-f]+}} <<invalid sloc>> Implicit
-RasterizerOrderedBuffer<vector<float, 4> > BufferArray3[4] : register(u4, space1);
+RasterizerOrderedBuffer<vector<float, 4> > BufferArray3[4];
diff --git clang/test/Preprocessor/predefined-arch-macros.c clang/test/Preprocessor/predefined-arch-macros.c
index 6f470d85ca56..49646d94d920 100644
--- clang/test/Preprocessor/predefined-arch-macros.c
+++ clang/test/Preprocessor/predefined-arch-macros.c
@@ -4053,6 +4053,20 @@
 
 // Begin SystemZ/GCC/Linux tests ----------------
 
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target s390x-ibm-zos \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ZOS
+// CHECK_SYSTEMZ_ZOS: #define __ARCH__ 10
+// CHECK_SYSTEMZ_ZOS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
+// CHECK_SYSTEMZ_ZOS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1
+// CHECK_SYSTEMZ_ZOS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1
+// CHECK_SYSTEMZ_ZOS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1
+// CHECK_SYSTEMZ_ZOS: #define __HTM__ 1
+// CHECK_SYSTEMZ_ZOS: #define __LONG_DOUBLE_128__ 1
+// CHECK_SYSTEMZ_ZOS: #define __s390__ 1
+// CHECK_SYSTEMZ_ZOS: #define __s390x__ 1
+// CHECK_SYSTEMZ_ZOS: #define __zarch__ 1
+
 // RUN: %clang -march=arch8 -E -dM %s -o - 2>&1 \
 // RUN:     -target s390x-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH8
@@ -4064,6 +4078,7 @@
 // CHECK_SYSTEMZ_ARCH8: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1
 // CHECK_SYSTEMZ_ARCH8: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1
 // CHECK_SYSTEMZ_ARCH8: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1
+// CHECK_SYSTEMZ_ARCH8-NOT: #define __HTM__ 1
 // CHECK_SYSTEMZ_ARCH8: #define __LONG_DOUBLE_128__ 1
 // CHECK_SYSTEMZ_ARCH8: #define __s390__ 1
 // CHECK_SYSTEMZ_ARCH8: #define __s390x__ 1
@@ -4080,6 +4095,7 @@
 // CHECK_SYSTEMZ_ARCH9: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1
 // CHECK_SYSTEMZ_ARCH9: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1
 // CHECK_SYSTEMZ_ARCH9: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1
+// CHECK_SYSTEMZ_ARCH9-NOT: #define __HTM__ 1
 // CHECK_SYSTEMZ_ARCH9: #define __LONG_DOUBLE_128__ 1
 // CHECK_SYSTEMZ_ARCH9: #define __s390__ 1
 // CHECK_SYSTEMZ_ARCH9: #define __s390x__ 1
diff --git clang/test/Sema/aarch64-neon-bf16-ranges.c clang/test/Sema/aarch64-neon-bf16-ranges.c
deleted file mode 100644
index 88e6c50c5938..000000000000
--- clang/test/Sema/aarch64-neon-bf16-ranges.c
+++ /dev/null
@@ -1,49 +0,0 @@
-// RUN: %clang_cc1 -fsyntax-only -verify \
-// RUN: -triple aarch64 -target-feature +neon \
-// RUN: -target-feature +bf16 %s
-
-// REQUIRES: aarch64-registered-target || arm-registered-target
-
-#include <arm_neon.h>
-
-int x;
-
-void test_vcopy_lane_bf16(bfloat16x4_t a, bfloat16x8_t b) {
-  // 0 <= lane1 <= 3; 0 <= lane2 <= 3
-  (void)vcopy_lane_bf16(a, 3, a, 3);
-  (void)vcopy_lane_bf16(a, 0, a, 4);    // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  (void)vcopy_lane_bf16(a, 1, a, -1);   // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  (void)vcopy_lane_bf16(a, 4, a, 0);    // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  (void)vcopy_lane_bf16(a, -1, a, 1);   // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  (void)vcopy_lane_bf16(a, 0, a, x);    // expected-error-re {{argument {{.*}} must be a constant integer}}
-  (void)vcopy_lane_bf16(a, x, a, 0);    // expected-error-re {{argument {{.*}} must be a constant integer}}
-
-  // 0 <= lane1 <= 7; 0 <= lane2 <= 3
-  (void)vcopyq_lane_bf16(b, 7, a, 3);
-  (void)vcopyq_lane_bf16(b, 0, a, 4);   // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  (void)vcopyq_lane_bf16(b, 1, a, -1);  // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  (void)vcopyq_lane_bf16(b, 8, a, 0);   // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  (void)vcopyq_lane_bf16(b, -1, a, 1);  // expected-error {{argument value -1 is outside the valid range [0, 7]}}
-  (void)vcopyq_lane_bf16(b, 0, a, x);   // expected-error-re {{argument {{.*}} must be a constant integer}}
-  (void)vcopyq_lane_bf16(b, x, a, 0);   // expected-error-re {{argument {{.*}} must be a constant integer}}
-
-  // 0 <= lane1 <= 3; 0 <= lane2 <= 7
-  (void)vcopy_laneq_bf16(a, 3, b, 7);
-  (void)vcopy_laneq_bf16(a, 0, b, 8);   // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  (void)vcopy_laneq_bf16(a, 1, b, -1);  // expected-error {{argument value -1 is outside the valid range [0, 7]}}
-  (void)vcopy_laneq_bf16(a, 4, b, 0);   // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  (void)vcopy_laneq_bf16(a, -1, b, 1);  // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  (void)vcopy_laneq_bf16(a, 0, b, x);   // expected-error-re {{argument {{.*}} must be a constant integer}}
-  (void)vcopy_laneq_bf16(a, x, b, 0);   // expected-error-re {{argument {{.*}} must be a constant integer}}
-
-
-  // 0 <= lane1 <= 7; 0 <= lane2 <= 7
-  (void)vcopyq_laneq_bf16(b, 7, b, 7);
-  (void)vcopyq_laneq_bf16(b, 0, b, 8);  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  (void)vcopyq_laneq_bf16(b, 1, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}}
-  (void)vcopyq_laneq_bf16(b, 8, b, 0);  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  (void)vcopyq_laneq_bf16(b, -1, b, 1); // expected-error {{argument value -1 is outside the valid range [0, 7]}}
-  (void)vcopyq_laneq_bf16(b, 0, b, x);  // expected-error-re {{argument {{.*}} must be a constant integer}}
-  (void)vcopyq_laneq_bf16(b, x, b, 0);  // expected-error-re {{argument {{.*}} must be a constant integer}}
-}
-
diff --git clang/test/Sema/aarch64-neon-faminmax-no-faminmax.c clang/test/Sema/aarch64-neon-faminmax-no-faminmax.c
new file mode 100644
index 000000000000..588f69cc7750
--- /dev/null
+++ clang/test/Sema/aarch64-neon-faminmax-no-faminmax.c
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -emit-llvm -verify %s -o /dev/null
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+float16x4_t a16x4, b16x4;
+float16x8_t a16x8, b16x8;
+float32x2_t a32x2, b32x2;
+float32x4_t a32x4, b32x4;
+float64x2_t a64x2, b64x2;
+
+
+void test() {
+  (void) vamin_f16 (a16x4, b16x4);
+// expected-error@-1 {{always_inline function 'vamin_f16' requires target feature 'faminmax'}}
+  (void) vaminq_f16(a16x8, b16x8);
+// expected-error@-1 {{always_inline function 'vaminq_f16' requires target feature 'faminmax'}}
+  (void) vamin_f32 (a32x2, b32x2);
+// expected-error@-1 {{always_inline function 'vamin_f32' requires target feature 'faminmax'}}
+  (void) vaminq_f32(a32x4, b32x4);
+// expected-error@-1 {{always_inline function 'vaminq_f32' requires target feature 'faminmax'}}
+  (void) vaminq_f64(a64x2, b64x2);
+// expected-error@-1 {{always_inline function 'vaminq_f64' requires target feature 'faminmax'}}
+  (void) vamax_f16 (a16x4, b16x4);
+// expected-error@-1 {{always_inline function 'vamax_f16' requires target feature 'faminmax'}}
+  (void) vamaxq_f16(a16x8, b16x8);
+// expected-error@-1 {{always_inline function 'vamaxq_f16' requires target feature 'faminmax'}}
+  (void) vamax_f32 (a32x2, b32x2);
+// expected-error@-1 {{always_inline function 'vamax_f32' requires target feature 'faminmax'}}
+  (void) vamaxq_f32(a32x4, b32x4);
+// expected-error@-1 {{always_inline function 'vamaxq_f32' requires target feature 'faminmax'}}
+  (void) vamaxq_f64(a64x2, b64x2);
+// expected-error@-1 {{always_inline function 'vamaxq_f64' requires target feature 'faminmax'}}
+}
diff --git clang/test/Sema/aarch64-neon-faminmax-no-neon.c clang/test/Sema/aarch64-neon-faminmax-no-neon.c
new file mode 100644
index 000000000000..a210e8398d9b
--- /dev/null
+++ clang/test/Sema/aarch64-neon-faminmax-no-neon.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +faminmax -emit-llvm -verify %s -o /dev/null
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+float16x4_t a16x4, b16x4;
+float16x8_t a16x8, b16x8;
+float32x2_t a32x2, b32x2;
+float32x4_t a32x4, b32x4;
+float64x2_t a64x2, b64x2;
+
+void test () {
+  (void) vamin_f16 (a16x4, b16x4);
+// expected-error@-1 {{always_inline function 'vamin_f16' requires target feature 'neon'}}
+  (void) vaminq_f16(a16x8, b16x8);
+// expected-error@-1 {{always_inline function 'vaminq_f16' requires target feature 'neon'}}
+  (void) vamin_f32 (a32x2, b32x2);
+// expected-error@-1 {{always_inline function 'vamin_f32' requires target feature 'neon'}}
+  (void) vaminq_f32(a32x4, b32x4);
+// expected-error@-1 {{always_inline function 'vaminq_f32' requires target feature 'neon'}}
+  (void) vaminq_f64(a64x2, b64x2);
+// expected-error@-1 {{always_inline function 'vaminq_f64' requires target feature 'neon'}}
+  (void) vamax_f16 (a16x4, b16x4);
+// expected-error@-1 {{always_inline function 'vamax_f16' requires target feature 'neon'}}
+  (void) vamaxq_f16(a16x8, b16x8);
+// expected-error@-1 {{always_inline function 'vamaxq_f16' requires target feature 'neon'}}
+  (void) vamax_f32 (a32x2, b32x2);
+// expected-error@-1 {{always_inline function 'vamax_f32' requires target feature 'neon'}}
+  (void) vamaxq_f32(a32x4, b32x4);
+// expected-error@-1 {{always_inline function 'vamaxq_f32' requires target feature 'neon'}}
+  (void) vamaxq_f64(a64x2, b64x2);
+// expected-error@-1 {{always_inline function 'vamaxq_f64' requires target feature 'neon'}}
+}
diff --git clang/test/Sema/aarch64-neon-fp16-ranges.c clang/test/Sema/aarch64-neon-fp16-ranges.c
deleted file mode 100644
index cb273eb56160..000000000000
--- clang/test/Sema/aarch64-neon-fp16-ranges.c
+++ /dev/null
@@ -1,66 +0,0 @@
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -target-feature +fullfp16 -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +fullfp16 -target-feature +neon -ffreestanding -fsyntax-only -verify %s
-
-// REQUIRES: aarch64-registered-target || arm-registered-target
-
-#include <arm_neon.h>
-#include <arm_fp16.h>
-
-void test_vcvt_f16_16(int16_t a){
-  vcvth_n_f16_s16(a, 1);
-  vcvth_n_f16_s16(a, 16);
-  vcvth_n_f16_s16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_f16_s16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vcvth_n_f16_u16(a, 1);
-  vcvth_n_f16_u16(a, 16);
-  vcvth_n_f16_u16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_f16_u16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_vcvt_f16_32(int32_t a){
-  vcvth_n_f16_u32(a, 1);
-  vcvth_n_f16_u32(a, 16);
-  vcvth_n_f16_u32(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_f16_u32(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vcvth_n_f16_s32(a, 1);
-  vcvth_n_f16_s32(a, 16);
-  vcvth_n_f16_s32(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_f16_s32(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_vcvt_f16_64(int64_t a){
-  vcvth_n_f16_s64(a, 1);
-  vcvth_n_f16_s64(a, 16);
-  vcvth_n_f16_s64(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_f16_s64(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-
-void test_vcvt_su_f(float16_t a){
-  vcvth_n_s16_f16(a, 1);
-  vcvth_n_s16_f16(a, 16);
-  vcvth_n_s16_f16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_s16_f16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vcvth_n_s32_f16(a, 1);
-  vcvth_n_s32_f16(a, 16);
-  vcvth_n_s32_f16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_s32_f16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vcvth_n_s64_f16(a, 1);
-  vcvth_n_s64_f16(a, 16);
-  vcvth_n_s64_f16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_s64_f16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vcvth_n_u16_f16(a, 1);
-  vcvth_n_u16_f16(a, 16);
-  vcvth_n_u16_f16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_u16_f16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vcvth_n_u32_f16(a, 1);
-  vcvth_n_u32_f16(a, 16);
-  vcvth_n_u32_f16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_u32_f16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c
new file mode 100644
index 000000000000..485219a9f897
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c
@@ -0,0 +1,237 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -target-feature +bf16 -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+#include <arm_bf16.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_set_all_lanes_to_the_same_value_bf16(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4) {
+	vdup_lane_bf16(arg_b16x4, 0);
+	vdup_lane_bf16(arg_b16x4, 3);
+	vdup_lane_bf16(arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_bf16(arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_bf16(arg_b16x4, 0);
+	vdupq_lane_bf16(arg_b16x4, 3);
+	vdupq_lane_bf16(arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_bf16(arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_bf16(arg_b16x8, 0);
+	vdup_laneq_bf16(arg_b16x8, 7);
+	vdup_laneq_bf16(arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_bf16(arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_bf16(arg_b16x8, 0);
+	vdupq_laneq_bf16(arg_b16x8, 7);
+	vdupq_laneq_bf16(arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_bf16(arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_lane_bf16(arg_b16x4, 0);
+	vduph_lane_bf16(arg_b16x4, 3);
+	vduph_lane_bf16(arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_lane_bf16(arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_laneq_bf16(arg_b16x8, 0);
+	vduph_laneq_bf16(arg_b16x8, 7);
+	vduph_laneq_bf16(arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_laneq_bf16(arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_split_vectors_bf16(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4) {
+	vget_lane_bf16(arg_b16x4, 0);
+	vget_lane_bf16(arg_b16x4, 3);
+	vget_lane_bf16(arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_bf16(arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_bf16(arg_b16x8, 0);
+	vgetq_lane_bf16(arg_b16x8, 7);
+	vgetq_lane_bf16(arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_bf16(arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_bf16(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4, bfloat16_t arg_b16) {
+	vset_lane_bf16(arg_b16, arg_b16x4, 0);
+	vset_lane_bf16(arg_b16, arg_b16x4, 3);
+	vset_lane_bf16(arg_b16, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_bf16(arg_b16, arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_bf16(arg_b16, arg_b16x8, 0);
+	vsetq_lane_bf16(arg_b16, arg_b16x8, 7);
+	vsetq_lane_bf16(arg_b16, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_bf16(arg_b16, arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_bf16(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4) {
+	vcopy_lane_bf16(arg_b16x4, 0, arg_b16x4, 0);
+	vcopy_lane_bf16(arg_b16x4, 3, arg_b16x4, 0);
+	vcopy_lane_bf16(arg_b16x4, -1, arg_b16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_bf16(arg_b16x4, 4, arg_b16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_bf16(arg_b16x4, 0, arg_b16x4, 3);
+	vcopy_lane_bf16(arg_b16x4, 0, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_bf16(arg_b16x4, 0, arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_bf16(arg_b16x8, 0, arg_b16x4, 0);
+	vcopyq_lane_bf16(arg_b16x8, 7, arg_b16x4, 0);
+	vcopyq_lane_bf16(arg_b16x8, -1, arg_b16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_bf16(arg_b16x8, 8, arg_b16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_bf16(arg_b16x8, 0, arg_b16x4, 3);
+	vcopyq_lane_bf16(arg_b16x8, 0, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_bf16(arg_b16x8, 0, arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_bf16(arg_b16x4, 0, arg_b16x8, 0);
+	vcopy_laneq_bf16(arg_b16x4, 3, arg_b16x8, 0);
+	vcopy_laneq_bf16(arg_b16x4, -1, arg_b16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_bf16(arg_b16x4, 4, arg_b16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_bf16(arg_b16x4, 0, arg_b16x8, 7);
+	vcopy_laneq_bf16(arg_b16x4, 0, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_bf16(arg_b16x4, 0, arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_bf16(arg_b16x8, 0, arg_b16x8, 0);
+	vcopyq_laneq_bf16(arg_b16x8, 7, arg_b16x8, 0);
+	vcopyq_laneq_bf16(arg_b16x8, -1, arg_b16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_bf16(arg_b16x8, 8, arg_b16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_bf16(arg_b16x8, 0, arg_b16x8, 7);
+	vcopyq_laneq_bf16(arg_b16x8, 0, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_bf16(arg_b16x8, 0, arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_load_bf16(bfloat16x4_t arg_b16x4, bfloat16x4x4_t arg_b16x4x4, bfloat16x8x4_t arg_b16x8x4,
+					bfloat16x4x3_t arg_b16x4x3, bfloat16x8_t arg_b16x8, bfloat16_t* arg_b16_ptr,
+					bfloat16x8x3_t arg_b16x8x3, bfloat16x4x2_t arg_b16x4x2, bfloat16x8x2_t arg_b16x8x2) {
+	vld1_lane_bf16(arg_b16_ptr, arg_b16x4, 0);
+	vld1_lane_bf16(arg_b16_ptr, arg_b16x4, 3);
+	vld1_lane_bf16(arg_b16_ptr, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_bf16(arg_b16_ptr, arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_bf16(arg_b16_ptr, arg_b16x8, 0);
+	vld1q_lane_bf16(arg_b16_ptr, arg_b16x8, 7);
+	vld1q_lane_bf16(arg_b16_ptr, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_bf16(arg_b16_ptr, arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 0);
+	vld2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 3);
+	vld2_lane_bf16(arg_b16_ptr, arg_b16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 0);
+	vld2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 7);
+	vld2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 0);
+	vld3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 3);
+	vld3_lane_bf16(arg_b16_ptr, arg_b16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 0);
+	vld3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 7);
+	vld3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 0);
+	vld4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 3);
+	vld4_lane_bf16(arg_b16_ptr, arg_b16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 0);
+	vld4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 7);
+	vld4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_bf16(bfloat16x4_t arg_b16x4, bfloat16x4x4_t arg_b16x4x4, bfloat16x8x4_t arg_b16x8x4,
+					 bfloat16x4x3_t arg_b16x4x3, bfloat16x8_t arg_b16x8, bfloat16_t* arg_b16_ptr,
+					 bfloat16x8x3_t arg_b16x8x3, bfloat16x4x2_t arg_b16x4x2, bfloat16x8x2_t arg_b16x8x2) {
+	vst1_lane_bf16(arg_b16_ptr, arg_b16x4, 0);
+	vst1_lane_bf16(arg_b16_ptr, arg_b16x4, 3);
+	vst1_lane_bf16(arg_b16_ptr, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_bf16(arg_b16_ptr, arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_bf16(arg_b16_ptr, arg_b16x8, 0);
+	vst1q_lane_bf16(arg_b16_ptr, arg_b16x8, 7);
+	vst1q_lane_bf16(arg_b16_ptr, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_bf16(arg_b16_ptr, arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 0);
+	vst2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 3);
+	vst2_lane_bf16(arg_b16_ptr, arg_b16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 0);
+	vst2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 7);
+	vst2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 0);
+	vst3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 3);
+	vst3_lane_bf16(arg_b16_ptr, arg_b16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 0);
+	vst3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 7);
+	vst3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 0);
+	vst4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 3);
+	vst4_lane_bf16(arg_b16_ptr, arg_b16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 0);
+	vst4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 7);
+	vst4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_dot_product_f32(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vbfdot_lane_f32(arg_f32x2, arg_b16x4, arg_b16x4, 0);
+	vbfdot_lane_f32(arg_f32x2, arg_b16x4, arg_b16x4, 1);
+	vbfdot_lane_f32(arg_f32x2, arg_b16x4, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vbfdot_lane_f32(arg_f32x2, arg_b16x4, arg_b16x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vbfdotq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 0);
+	vbfdotq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 3);
+	vbfdotq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vbfdotq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vbfdot_laneq_f32(arg_f32x2, arg_b16x4, arg_b16x8, 0);
+	vbfdot_laneq_f32(arg_f32x2, arg_b16x4, arg_b16x8, 3);
+	vbfdot_laneq_f32(arg_f32x2, arg_b16x4, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vbfdot_laneq_f32(arg_f32x2, arg_b16x4, arg_b16x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vbfdotq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 0);
+	vbfdotq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 1);
+	vbfdotq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vbfdotq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_accumulate_by_scalar_f32(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4, float32x4_t arg_f32x4) {
+	vbfmlalbq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 0);
+	vbfmlalbq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 3);
+	vbfmlalbq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vbfmlalbq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vbfmlalbq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 0);
+	vbfmlalbq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 7);
+	vbfmlalbq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vbfmlalbq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vbfmlaltq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 0);
+	vbfmlaltq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 3);
+	vbfmlaltq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vbfmlaltq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vbfmlaltq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 0);
+	vbfmlaltq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 7);
+	vbfmlaltq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vbfmlaltq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/conversions.c clang/test/Sema/aarch64-neon-immediate-ranges/conversions.c
new file mode 100644
index 000000000000..30ae7f739242
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/conversions.c
@@ -0,0 +1,144 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_conversions_f32(float32_t arg_f32, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vcvt_n_s32_f32(arg_f32x2, 1);
+	vcvt_n_s32_f32(arg_f32x2, 32);
+	vcvt_n_s32_f32(arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_s32_f32(arg_f32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_s32_f32(arg_f32x4, 1);
+	vcvtq_n_s32_f32(arg_f32x4, 32);
+	vcvtq_n_s32_f32(arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_s32_f32(arg_f32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvt_n_u32_f32(arg_f32x2, 1);
+	vcvt_n_u32_f32(arg_f32x2, 32);
+	vcvt_n_u32_f32(arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_u32_f32(arg_f32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_u32_f32(arg_f32x4, 1);
+	vcvtq_n_u32_f32(arg_f32x4, 32);
+	vcvtq_n_u32_f32(arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_u32_f32(arg_f32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvts_n_s32_f32(arg_f32, 1);
+	vcvts_n_s32_f32(arg_f32, 32);
+	vcvts_n_s32_f32(arg_f32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvts_n_s32_f32(arg_f32, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvts_n_u32_f32(arg_f32, 1);
+	vcvts_n_u32_f32(arg_f32, 32);
+	vcvts_n_u32_f32(arg_f32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvts_n_u32_f32(arg_f32, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_f64(float64x1_t arg_f64x1, float64x2_t arg_f64x2, float64_t arg_f64) {
+	vcvt_n_s64_f64(arg_f64x1, 1);
+	vcvt_n_s64_f64(arg_f64x1, 64);
+	vcvt_n_s64_f64(arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_s64_f64(arg_f64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_s64_f64(arg_f64x2, 1);
+	vcvtq_n_s64_f64(arg_f64x2, 64);
+	vcvtq_n_s64_f64(arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_s64_f64(arg_f64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvt_n_u64_f64(arg_f64x1, 1);
+	vcvt_n_u64_f64(arg_f64x1, 64);
+	vcvt_n_u64_f64(arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_u64_f64(arg_f64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_u64_f64(arg_f64x2, 1);
+	vcvtq_n_u64_f64(arg_f64x2, 64);
+	vcvtq_n_u64_f64(arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_u64_f64(arg_f64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtd_n_s64_f64(arg_f64, 1);
+	vcvtd_n_s64_f64(arg_f64, 64);
+	vcvtd_n_s64_f64(arg_f64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtd_n_s64_f64(arg_f64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtd_n_u64_f64(arg_f64, 1);
+	vcvtd_n_u64_f64(arg_f64, 64);
+	vcvtd_n_u64_f64(arg_f64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtd_n_u64_f64(arg_f64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_s32(int32_t arg_i32, int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vcvt_n_f32_s32(arg_i32x2, 1);
+	vcvt_n_f32_s32(arg_i32x2, 32);
+	vcvt_n_f32_s32(arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f32_s32(arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f32_s32(arg_i32x4, 1);
+	vcvtq_n_f32_s32(arg_i32x4, 32);
+	vcvtq_n_f32_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f32_s32(arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvts_n_f32_s32(arg_i32, 1);
+	vcvts_n_f32_s32(arg_i32, 32);
+	vcvts_n_f32_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvts_n_f32_s32(arg_i32, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_u32(uint32x4_t arg_u32x4, uint32x2_t arg_u32x2, uint32_t arg_u32) {
+	vcvt_n_f32_u32(arg_u32x2, 1);
+	vcvt_n_f32_u32(arg_u32x2, 32);
+	vcvt_n_f32_u32(arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f32_u32(arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f32_u32(arg_u32x4, 1);
+	vcvtq_n_f32_u32(arg_u32x4, 32);
+	vcvtq_n_f32_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f32_u32(arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvts_n_f32_u32(arg_u32, 1);
+	vcvts_n_f32_u32(arg_u32, 32);
+	vcvts_n_f32_u32(arg_u32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvts_n_f32_u32(arg_u32, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_s64(int64x2_t arg_i64x2, int64x1_t arg_i64x1, int64_t arg_i64) {
+	vcvt_n_f64_s64(arg_i64x1, 1);
+	vcvt_n_f64_s64(arg_i64x1, 64);
+	vcvt_n_f64_s64(arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f64_s64(arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f64_s64(arg_i64x2, 1);
+	vcvtq_n_f64_s64(arg_i64x2, 64);
+	vcvtq_n_f64_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f64_s64(arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtd_n_f64_s64(arg_i64, 1);
+	vcvtd_n_f64_s64(arg_i64, 64);
+	vcvtd_n_f64_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtd_n_f64_s64(arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vcvt_n_f64_u64(arg_u64x1, 1);
+	vcvt_n_f64_u64(arg_u64x1, 64);
+	vcvt_n_f64_u64(arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f64_u64(arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f64_u64(arg_u64x2, 1);
+	vcvtq_n_f64_u64(arg_u64x2, 64);
+	vcvtq_n_f64_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f64_u64(arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtd_n_f64_u64(arg_u64, 1);
+	vcvtd_n_f64_u64(arg_u64, 64);
+	vcvtd_n_f64_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtd_n_f64_u64(arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/copy-vector-lane.c clang/test/Sema/aarch64-neon-immediate-ranges/copy-vector-lane.c
new file mode 100644
index 000000000000..aafd36d1ccfe
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/copy-vector-lane.c
@@ -0,0 +1,498 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_copy_vector_lane_s8(int8x16_t arg_i8x16, int8x8_t arg_i8x8) {
+	vcopy_lane_s8(arg_i8x8, 0, arg_i8x8, 0);
+
+	vcopy_lane_s8(arg_i8x8, 7, arg_i8x8, 0);
+	vcopy_lane_s8(arg_i8x8, -1, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s8(arg_i8x8, 8, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_s8(arg_i8x8, 0, arg_i8x8, 7);
+	vcopy_lane_s8(arg_i8x8, 0, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s8(arg_i8x8, 0, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s8(arg_i8x16, 0, arg_i8x8, 0);
+	vcopyq_lane_s8(arg_i8x16, 15, arg_i8x8, 0);
+	vcopyq_lane_s8(arg_i8x16, -1, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s8(arg_i8x16, 16, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s8(arg_i8x16, 0, arg_i8x8, 7);
+	vcopyq_lane_s8(arg_i8x16, 0, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s8(arg_i8x16, 0, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s8(arg_i8x8, 0, arg_i8x16, 0);
+	vcopy_laneq_s8(arg_i8x8, 7, arg_i8x16, 0);
+	vcopy_laneq_s8(arg_i8x8, -1, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s8(arg_i8x8, 8, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s8(arg_i8x8, 0, arg_i8x16, 15);
+	vcopy_laneq_s8(arg_i8x8, 0, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s8(arg_i8x8, 0, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s8(arg_i8x16, 0, arg_i8x16, 0);
+	vcopyq_laneq_s8(arg_i8x16, 15, arg_i8x16, 0);
+	vcopyq_laneq_s8(arg_i8x16, -1, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s8(arg_i8x16, 16, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s8(arg_i8x16, 0, arg_i8x16, 15);
+	vcopyq_laneq_s8(arg_i8x16, 0, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s8(arg_i8x16, 0, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vcopy_lane_s16(arg_i16x4, 0, arg_i16x4, 0);
+	vcopy_lane_s16(arg_i16x4, 3, arg_i16x4, 0);
+	vcopy_lane_s16(arg_i16x4, -1, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s16(arg_i16x4, 4, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_s16(arg_i16x4, 0, arg_i16x4, 3);
+	vcopy_lane_s16(arg_i16x4, 0, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s16(arg_i16x4, 0, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s16(arg_i16x8, 0, arg_i16x4, 0);
+	vcopyq_lane_s16(arg_i16x8, 7, arg_i16x4, 0);
+	vcopyq_lane_s16(arg_i16x8, -1, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s16(arg_i16x8, 8, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s16(arg_i16x8, 0, arg_i16x4, 3);
+	vcopyq_lane_s16(arg_i16x8, 0, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s16(arg_i16x8, 0, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s16(arg_i16x4, 0, arg_i16x8, 0);
+	vcopy_laneq_s16(arg_i16x4, 3, arg_i16x8, 0);
+	vcopy_laneq_s16(arg_i16x4, -1, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s16(arg_i16x4, 4, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s16(arg_i16x4, 0, arg_i16x8, 7);
+	vcopy_laneq_s16(arg_i16x4, 0, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s16(arg_i16x4, 0, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s16(arg_i16x8, 0, arg_i16x8, 0);
+	vcopyq_laneq_s16(arg_i16x8, 7, arg_i16x8, 0);
+	vcopyq_laneq_s16(arg_i16x8, -1, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s16(arg_i16x8, 8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s16(arg_i16x8, 0, arg_i16x8, 7);
+	vcopyq_laneq_s16(arg_i16x8, 0, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s16(arg_i16x8, 0, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vcopy_lane_s32(arg_i32x2, 0, arg_i32x2, 0);
+	vcopy_lane_s32(arg_i32x2, 1, arg_i32x2, 0);
+	vcopy_lane_s32(arg_i32x2, -1, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s32(arg_i32x2, 2, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_s32(arg_i32x2, 0, arg_i32x2, 1);
+	vcopy_lane_s32(arg_i32x2, 0, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s32(arg_i32x2, 0, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s32(arg_i32x4, 0, arg_i32x2, 0);
+	vcopyq_lane_s32(arg_i32x4, 3, arg_i32x2, 0);
+	vcopyq_lane_s32(arg_i32x4, -1, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s32(arg_i32x4, 4, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s32(arg_i32x4, 0, arg_i32x2, 1);
+	vcopyq_lane_s32(arg_i32x4, 0, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s32(arg_i32x4, 0, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s32(arg_i32x2, 0, arg_i32x4, 0);
+	vcopy_laneq_s32(arg_i32x2, 1, arg_i32x4, 0);
+	vcopy_laneq_s32(arg_i32x2, -1, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s32(arg_i32x2, 2, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s32(arg_i32x2, 0, arg_i32x4, 3);
+	vcopy_laneq_s32(arg_i32x2, 0, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s32(arg_i32x2, 0, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s32(arg_i32x4, 0, arg_i32x4, 0);
+	vcopyq_laneq_s32(arg_i32x4, 3, arg_i32x4, 0);
+	vcopyq_laneq_s32(arg_i32x4, -1, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s32(arg_i32x4, 4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s32(arg_i32x4, 0, arg_i32x4, 3);
+	vcopyq_laneq_s32(arg_i32x4, 0, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s32(arg_i32x4, 0, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_s64(int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vcopy_lane_s64(arg_i64x1, 0, arg_i64x1, 0);
+	vcopy_lane_s64(arg_i64x1, -1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s64(arg_i64x1, 1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s64(arg_i64x1, 0, arg_i64x1, 0);
+	vcopy_lane_s64(arg_i64x1, 0, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s64(arg_i64x1, 0, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s64(arg_i64x2, 0, arg_i64x1, 0);
+	vcopyq_lane_s64(arg_i64x2, 1, arg_i64x1, 0);
+	vcopyq_lane_s64(arg_i64x2, -1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s64(arg_i64x2, 2, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s64(arg_i64x2, 0, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s64(arg_i64x2, 0, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s64(arg_i64x1, 0, arg_i64x2, 0);
+	vcopy_laneq_s64(arg_i64x1, -1, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s64(arg_i64x1, 1, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s64(arg_i64x1, 0, arg_i64x2, 1);
+	vcopy_laneq_s64(arg_i64x1, 0, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s64(arg_i64x1, 0, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s64(arg_i64x2, 0, arg_i64x2, 0);
+	vcopyq_laneq_s64(arg_i64x2, 1, arg_i64x2, 0);
+	vcopyq_laneq_s64(arg_i64x2, -1, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s64(arg_i64x2, 2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s64(arg_i64x2, 0, arg_i64x2, 1);
+	vcopyq_laneq_s64(arg_i64x2, 0, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s64(arg_i64x2, 0, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vcopy_lane_u8(arg_u8x8, 0, arg_u8x8, 0);
+	vcopy_lane_u8(arg_u8x8, 7, arg_u8x8, 0);
+	vcopy_lane_u8(arg_u8x8, -1, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u8(arg_u8x8, 8, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_u8(arg_u8x8, 0, arg_u8x8, 7);
+	vcopy_lane_u8(arg_u8x8, 0, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u8(arg_u8x8, 0, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u8(arg_u8x16, 0, arg_u8x8, 0);
+	vcopyq_lane_u8(arg_u8x16, 15, arg_u8x8, 0);
+	vcopyq_lane_u8(arg_u8x16, -1, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u8(arg_u8x16, 16, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u8(arg_u8x16, 0, arg_u8x8, 7);
+	vcopyq_lane_u8(arg_u8x16, 0, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u8(arg_u8x16, 0, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u8(arg_u8x8, 0, arg_u8x16, 0);
+	vcopy_laneq_u8(arg_u8x8, 7, arg_u8x16, 0);
+	vcopy_laneq_u8(arg_u8x8, -1, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u8(arg_u8x8, 8, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u8(arg_u8x8, 0, arg_u8x16, 15);
+	vcopy_laneq_u8(arg_u8x8, 0, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u8(arg_u8x8, 0, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u8(arg_u8x16, 0, arg_u8x16, 0);
+	vcopyq_laneq_u8(arg_u8x16, 15, arg_u8x16, 0);
+	vcopyq_laneq_u8(arg_u8x16, -1, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u8(arg_u8x16, 16, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u8(arg_u8x16, 0, arg_u8x16, 15);
+	vcopyq_laneq_u8(arg_u8x16, 0, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u8(arg_u8x16, 0, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vcopy_lane_u16(arg_u16x4, 0, arg_u16x4, 0);
+	vcopy_lane_u16(arg_u16x4, 3, arg_u16x4, 0);
+	vcopy_lane_u16(arg_u16x4, -1, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u16(arg_u16x4, 4, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_u16(arg_u16x4, 0, arg_u16x4, 3);
+	vcopy_lane_u16(arg_u16x4, 0, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u16(arg_u16x4, 0, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u16(arg_u16x8, 0, arg_u16x4, 0);
+	vcopyq_lane_u16(arg_u16x8, 7, arg_u16x4, 0);
+	vcopyq_lane_u16(arg_u16x8, -1, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u16(arg_u16x8, 8, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u16(arg_u16x8, 0, arg_u16x4, 3);
+	vcopyq_lane_u16(arg_u16x8, 0, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u16(arg_u16x8, 0, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u16(arg_u16x4, 0, arg_u16x8, 0);
+	vcopy_laneq_u16(arg_u16x4, 3, arg_u16x8, 0);
+	vcopy_laneq_u16(arg_u16x4, -1, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u16(arg_u16x4, 4, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u16(arg_u16x4, 0, arg_u16x8, 7);
+	vcopy_laneq_u16(arg_u16x4, 0, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u16(arg_u16x4, 0, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u16(arg_u16x8, 0, arg_u16x8, 0);
+	vcopyq_laneq_u16(arg_u16x8, 7, arg_u16x8, 0);
+	vcopyq_laneq_u16(arg_u16x8, -1, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u16(arg_u16x8, 8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u16(arg_u16x8, 0, arg_u16x8, 7);
+	vcopyq_laneq_u16(arg_u16x8, 0, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u16(arg_u16x8, 0, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vcopy_lane_u32(arg_u32x2, 0, arg_u32x2, 0);
+	vcopy_lane_u32(arg_u32x2, 1, arg_u32x2, 0);
+	vcopy_lane_u32(arg_u32x2, -1, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u32(arg_u32x2, 2, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_u32(arg_u32x2, 0, arg_u32x2, 1);
+	vcopy_lane_u32(arg_u32x2, 0, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u32(arg_u32x2, 0, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u32(arg_u32x4, 0, arg_u32x2, 0);
+	vcopyq_lane_u32(arg_u32x4, 3, arg_u32x2, 0);
+	vcopyq_lane_u32(arg_u32x4, -1, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u32(arg_u32x4, 4, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u32(arg_u32x4, 0, arg_u32x2, 1);
+	vcopyq_lane_u32(arg_u32x4, 0, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u32(arg_u32x4, 0, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u32(arg_u32x2, 0, arg_u32x4, 0);
+	vcopy_laneq_u32(arg_u32x2, 1, arg_u32x4, 0);
+	vcopy_laneq_u32(arg_u32x2, -1, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u32(arg_u32x2, 2, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u32(arg_u32x2, 0, arg_u32x4, 3);
+	vcopy_laneq_u32(arg_u32x2, 0, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u32(arg_u32x2, 0, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u32(arg_u32x4, 0, arg_u32x4, 0);
+	vcopyq_laneq_u32(arg_u32x4, 3, arg_u32x4, 0);
+	vcopyq_laneq_u32(arg_u32x4, -1, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u32(arg_u32x4, 4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u32(arg_u32x4, 0, arg_u32x4, 3);
+	vcopyq_laneq_u32(arg_u32x4, 0, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u32(arg_u32x4, 0, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_u64(uint64x2_t arg_u64x2, uint64x1_t arg_u64x1) {
+	vcopy_lane_u64(arg_u64x1, 0, arg_u64x1, 0);
+	vcopy_lane_u64(arg_u64x1, -1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u64(arg_u64x1, 1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_u64(arg_u64x1, 0, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u64(arg_u64x1, 0, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u64(arg_u64x2, 0, arg_u64x1, 0);
+	vcopyq_lane_u64(arg_u64x2, 1, arg_u64x1, 0);
+	vcopyq_lane_u64(arg_u64x2, -1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u64(arg_u64x2, 2, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u64(arg_u64x2, 0, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u64(arg_u64x2, 0, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u64(arg_u64x1, 0, arg_u64x2, 0);
+	vcopy_laneq_u64(arg_u64x1, -1, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u64(arg_u64x1, 1, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u64(arg_u64x1, 0, arg_u64x2, 1);
+	vcopy_laneq_u64(arg_u64x1, 0, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u64(arg_u64x1, 0, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u64(arg_u64x2, 0, arg_u64x2, 0);
+	vcopyq_laneq_u64(arg_u64x2, 1, arg_u64x2, 0);
+	vcopyq_laneq_u64(arg_u64x2, -1, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u64(arg_u64x2, 2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u64(arg_u64x2, 0, arg_u64x2, 1);
+	vcopyq_laneq_u64(arg_u64x2, 0, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u64(arg_u64x2, 0, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_p64(poly64x1_t arg_p64x1, poly64x2_t arg_p64x2) {
+	vcopy_lane_p64(arg_p64x1, 0, arg_p64x1, 0);
+	vcopy_lane_p64(arg_p64x1, -1, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p64(arg_p64x1, 1, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_p64(arg_p64x1, 0, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p64(arg_p64x1, 0, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p64(arg_p64x2, 0, arg_p64x1, 0);
+	vcopyq_lane_p64(arg_p64x2, 1, arg_p64x1, 0);
+	vcopyq_lane_p64(arg_p64x2, -1, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p64(arg_p64x2, 2, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p64(arg_p64x2, 0, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p64(arg_p64x2, 0, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p64(arg_p64x1, 0, arg_p64x2, 0);
+	vcopy_laneq_p64(arg_p64x1, -1, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p64(arg_p64x1, 1, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p64(arg_p64x1, 0, arg_p64x2, 1);
+	vcopy_laneq_p64(arg_p64x1, 0, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p64(arg_p64x1, 0, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p64(arg_p64x2, 0, arg_p64x2, 0);
+	vcopyq_laneq_p64(arg_p64x2, 1, arg_p64x2, 0);
+	vcopyq_laneq_p64(arg_p64x2, -1, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p64(arg_p64x2, 2, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p64(arg_p64x2, 0, arg_p64x2, 1);
+	vcopyq_laneq_p64(arg_p64x2, 0, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p64(arg_p64x2, 0, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_f32(float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vcopy_lane_f32(arg_f32x2, 0, arg_f32x2, 0);
+	vcopy_lane_f32(arg_f32x2, 1, arg_f32x2, 0);
+	vcopy_lane_f32(arg_f32x2, -1, arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_f32(arg_f32x2, 2, arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_f32(arg_f32x2, 0, arg_f32x2, 1);
+	vcopy_lane_f32(arg_f32x2, 0, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_f32(arg_f32x2, 0, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_f32(arg_f32x4, 0, arg_f32x2, 0);
+	vcopyq_lane_f32(arg_f32x4, 3, arg_f32x2, 0);
+	vcopyq_lane_f32(arg_f32x4, -1, arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_f32(arg_f32x4, 4, arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_f32(arg_f32x4, 0, arg_f32x2, 1);
+	vcopyq_lane_f32(arg_f32x4, 0, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_f32(arg_f32x4, 0, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_f32(arg_f32x2, 0, arg_f32x4, 0);
+	vcopy_laneq_f32(arg_f32x2, 1, arg_f32x4, 0);
+	vcopy_laneq_f32(arg_f32x2, -1, arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_f32(arg_f32x2, 2, arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_f32(arg_f32x2, 0, arg_f32x4, 3);
+	vcopy_laneq_f32(arg_f32x2, 0, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_f32(arg_f32x2, 0, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_f32(arg_f32x4, 0, arg_f32x4, 0);
+	vcopyq_laneq_f32(arg_f32x4, 3, arg_f32x4, 0);
+	vcopyq_laneq_f32(arg_f32x4, -1, arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_f32(arg_f32x4, 4, arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_f32(arg_f32x4, 0, arg_f32x4, 3);
+	vcopyq_laneq_f32(arg_f32x4, 0, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_f32(arg_f32x4, 0, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_f64(float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vcopy_lane_f64(arg_f64x1, 0, arg_f64x1, 0);
+	vcopy_lane_f64(arg_f64x1, -1, arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_f64(arg_f64x1, 1, arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_f64(arg_f64x1, 0, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_f64(arg_f64x1, 0, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_f64(arg_f64x2, 0, arg_f64x1, 0);
+	vcopyq_lane_f64(arg_f64x2, 1, arg_f64x1, 0);
+	vcopyq_lane_f64(arg_f64x2, -1, arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_f64(arg_f64x2, 2, arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_f64(arg_f64x2, 0, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_f64(arg_f64x2, 0, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_f64(arg_f64x1, 0, arg_f64x2, 0);
+	vcopy_laneq_f64(arg_f64x1, -1, arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_f64(arg_f64x1, 1, arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_f64(arg_f64x1, 0, arg_f64x2, 1);
+	vcopy_laneq_f64(arg_f64x1, 0, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_f64(arg_f64x1, 0, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_f64(arg_f64x2, 0, arg_f64x2, 0);
+	vcopyq_laneq_f64(arg_f64x2, 1, arg_f64x2, 0);
+	vcopyq_laneq_f64(arg_f64x2, -1, arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_f64(arg_f64x2, 2, arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_f64(arg_f64x2, 0, arg_f64x2, 1);
+	vcopyq_laneq_f64(arg_f64x2, 0, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_f64(arg_f64x2, 0, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vcopy_lane_p8(arg_p8x8, 0, arg_p8x8, 0);
+	vcopy_lane_p8(arg_p8x8, 7, arg_p8x8, 0);
+	vcopy_lane_p8(arg_p8x8, -1, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p8(arg_p8x8, 8, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_p8(arg_p8x8, 0, arg_p8x8, 7);
+	vcopy_lane_p8(arg_p8x8, 0, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p8(arg_p8x8, 0, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p8(arg_p8x16, 0, arg_p8x8, 0);
+	vcopyq_lane_p8(arg_p8x16, 15, arg_p8x8, 0);
+	vcopyq_lane_p8(arg_p8x16, -1, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p8(arg_p8x16, 16, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p8(arg_p8x16, 0, arg_p8x8, 7);
+	vcopyq_lane_p8(arg_p8x16, 0, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p8(arg_p8x16, 0, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p8(arg_p8x8, 0, arg_p8x16, 0);
+	vcopy_laneq_p8(arg_p8x8, 7, arg_p8x16, 0);
+	vcopy_laneq_p8(arg_p8x8, -1, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p8(arg_p8x8, 8, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p8(arg_p8x8, 0, arg_p8x16, 15);
+	vcopy_laneq_p8(arg_p8x8, 0, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p8(arg_p8x8, 0, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p8(arg_p8x16, 0, arg_p8x16, 0);
+	vcopyq_laneq_p8(arg_p8x16, 15, arg_p8x16, 0);
+	vcopyq_laneq_p8(arg_p8x16, -1, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p8(arg_p8x16, 16, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p8(arg_p8x16, 0, arg_p8x16, 15);
+	vcopyq_laneq_p8(arg_p8x16, 0, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p8(arg_p8x16, 0, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_p16(poly16x8_t arg_p16x8, poly16x4_t arg_p16x4) {
+	vcopy_lane_p16(arg_p16x4, 0, arg_p16x4, 0);
+	vcopy_lane_p16(arg_p16x4, 3, arg_p16x4, 0);
+	vcopy_lane_p16(arg_p16x4, -1, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p16(arg_p16x4, 4, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_p16(arg_p16x4, 0, arg_p16x4, 3);
+	vcopy_lane_p16(arg_p16x4, 0, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p16(arg_p16x4, 0, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p16(arg_p16x8, 0, arg_p16x4, 0);
+	vcopyq_lane_p16(arg_p16x8, 7, arg_p16x4, 0);
+	vcopyq_lane_p16(arg_p16x8, -1, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p16(arg_p16x8, 8, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p16(arg_p16x8, 0, arg_p16x4, 3);
+	vcopyq_lane_p16(arg_p16x8, 0, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p16(arg_p16x8, 0, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p16(arg_p16x4, 0, arg_p16x8, 0);
+	vcopy_laneq_p16(arg_p16x4, 3, arg_p16x8, 0);
+	vcopy_laneq_p16(arg_p16x4, -1, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p16(arg_p16x4, 4, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p16(arg_p16x4, 0, arg_p16x8, 7);
+	vcopy_laneq_p16(arg_p16x4, 0, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p16(arg_p16x4, 0, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p16(arg_p16x8, 0, arg_p16x8, 0);
+	vcopyq_laneq_p16(arg_p16x8, 7, arg_p16x8, 0);
+	vcopyq_laneq_p16(arg_p16x8, -1, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p16(arg_p16x8, 8, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p16(arg_p16x8, 0, arg_p16x8, 7);
+	vcopyq_laneq_p16(arg_p16x8, 0, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p16(arg_p16x8, 0, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c
new file mode 100644
index 000000000000..11f2c660a8ff
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.2a -target-feature +dotprod -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+void test_dot_product_u32(uint8x8_t arg_u8x8, uint32x2_t arg_u32x2, uint8x16_t arg_u8x16, uint32x4_t arg_u32x4) {
+	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, 0);
+	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, 1);
+	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdotq_laneq_u32(arg_u32x4, arg_u8x16, arg_u8x16, 0);
+	vdotq_laneq_u32(arg_u32x4, arg_u8x16, arg_u8x16, 3);
+	vdotq_laneq_u32(arg_u32x4, arg_u8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdotq_laneq_u32(arg_u32x4, arg_u8x16, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdot_laneq_u32(arg_u32x2, arg_u8x8, arg_u8x16, 0);
+	vdot_laneq_u32(arg_u32x2, arg_u8x8, arg_u8x16, 3);
+	vdot_laneq_u32(arg_u32x2, arg_u8x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdot_laneq_u32(arg_u32x2, arg_u8x8, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdotq_lane_u32(arg_u32x4, arg_u8x16, arg_u8x8, 0);
+	vdotq_lane_u32(arg_u32x4, arg_u8x16, arg_u8x8, 1);
+	vdotq_lane_u32(arg_u32x4, arg_u8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdotq_lane_u32(arg_u32x4, arg_u8x16, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_dot_product_s32(int32x2_t arg_i32x2, int8x16_t arg_i8x16, int8x8_t arg_i8x8, int32x4_t arg_i32x4) {
+	vdot_lane_s32(arg_i32x2, arg_i8x8, arg_i8x8, 0);
+	vdot_lane_s32(arg_i32x2, arg_i8x8, arg_i8x8, 1);
+	vdot_lane_s32(arg_i32x2, arg_i8x8, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdot_lane_s32(arg_i32x2, arg_i8x8, arg_i8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdotq_laneq_s32(arg_i32x4, arg_i8x16, arg_i8x16, 0);
+	vdotq_laneq_s32(arg_i32x4, arg_i8x16, arg_i8x16, 3);
+	vdotq_laneq_s32(arg_i32x4, arg_i8x16, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdotq_laneq_s32(arg_i32x4, arg_i8x16, arg_i8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdot_laneq_s32(arg_i32x2, arg_i8x8, arg_i8x16, 0);
+	vdot_laneq_s32(arg_i32x2, arg_i8x8, arg_i8x16, 3);
+	vdot_laneq_s32(arg_i32x2, arg_i8x8, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdot_laneq_s32(arg_i32x2, arg_i8x8, arg_i8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdotq_lane_s32(arg_i32x4, arg_i8x16, arg_i8x8, 0);
+	vdotq_lane_s32(arg_i32x4, arg_i8x16, arg_i8x8, 1);
+	vdotq_lane_s32(arg_i32x4, arg_i8x16, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdotq_lane_s32(arg_i32x4, arg_i8x16, arg_i8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/extract-elt-from-vector.c clang/test/Sema/aarch64-neon-immediate-ranges/extract-elt-from-vector.c
new file mode 100644
index 000000000000..5738f5ad27f3
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/extract-elt-from-vector.c
@@ -0,0 +1,301 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_extract_one_element_from_vector_s8(int8x16_t arg_i8x16, int8x8_t arg_i8x8) {
+	vdupb_lane_s8(arg_i8x8, 0);
+	vdupb_lane_s8(arg_i8x8, 7);
+	vdupb_lane_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_lane_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupb_laneq_s8(arg_i8x16, 0);
+	vdupb_laneq_s8(arg_i8x16, 15);
+	vdupb_laneq_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_laneq_s8(arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_s8(arg_i8x8, 0);
+	vget_lane_s8(arg_i8x8, 7);
+	vget_lane_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_s8(arg_i8x16, 0);
+	vgetq_lane_s8(arg_i8x16, 15);
+	vgetq_lane_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_s8(arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vduph_lane_s16(arg_i16x4, 0);
+	vduph_lane_s16(arg_i16x4, 3);
+	vduph_lane_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_lane_s16(arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_laneq_s16(arg_i16x8, 0);
+	vduph_laneq_s16(arg_i16x8, 7);
+	vduph_laneq_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_laneq_s16(arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_s16(arg_i16x4, 0);
+	vget_lane_s16(arg_i16x4, 3);
+	vget_lane_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_s16(arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_s16(arg_i16x8, 0);
+	vgetq_lane_s16(arg_i16x8, 7);
+	vgetq_lane_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_s16(arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_s32(int32x4_t arg_i32x4, int32x2_t arg_i32x2) {
+	vdups_lane_s32(arg_i32x2, 0);
+	vdups_lane_s32(arg_i32x2, 1);
+	vdups_lane_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_lane_s32(arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdups_laneq_s32(arg_i32x4, 0);
+	vdups_laneq_s32(arg_i32x4, 3);
+	vdups_laneq_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_laneq_s32(arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_s32(arg_i32x2, 0);
+	vget_lane_s32(arg_i32x2, 1);
+	vget_lane_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_s32(arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_s32(arg_i32x4, 0);
+	vgetq_lane_s32(arg_i32x4, 3);
+	vgetq_lane_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_s32(arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_s64(int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vdupd_lane_s64(arg_i64x1, 0);
+	vdupd_lane_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_lane_s64(arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupd_laneq_s64(arg_i64x2, 0);
+	vdupd_laneq_s64(arg_i64x2, 1);
+	vdupd_laneq_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_laneq_s64(arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_s64(arg_i64x1, 0);
+	vget_lane_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_s64(arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_s64(arg_i64x2, 0);
+	vgetq_lane_s64(arg_i64x2, 1);
+	vgetq_lane_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_s64(arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vdupb_lane_u8(arg_u8x8, 0);
+	vdupb_lane_u8(arg_u8x8, 7);
+	vdupb_lane_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_lane_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupb_laneq_u8(arg_u8x16, 0);
+	vdupb_laneq_u8(arg_u8x16, 15);
+	vdupb_laneq_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_laneq_u8(arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_u8(arg_u8x8, 0);
+	vget_lane_u8(arg_u8x8, 7);
+	vget_lane_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_u8(arg_u8x16, 0);
+	vgetq_lane_u8(arg_u8x16, 15);
+	vgetq_lane_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_u8(arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vduph_lane_u16(arg_u16x4, 0);
+	vduph_lane_u16(arg_u16x4, 3);
+	vduph_lane_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_lane_u16(arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_laneq_u16(arg_u16x8, 0);
+	vduph_laneq_u16(arg_u16x8, 7);
+	vduph_laneq_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_laneq_u16(arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_u16(arg_u16x4, 0);
+	vget_lane_u16(arg_u16x4, 3);
+	vget_lane_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_u16(arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_u16(arg_u16x8, 0);
+	vgetq_lane_u16(arg_u16x8, 7);
+	vgetq_lane_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_u16(arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vdups_lane_u32(arg_u32x2, 0);
+	vdups_lane_u32(arg_u32x2, 1);
+	vdups_lane_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_lane_u32(arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdups_laneq_u32(arg_u32x4, 0);
+	vdups_laneq_u32(arg_u32x4, 3);
+	vdups_laneq_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_laneq_u32(arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_u32(arg_u32x2, 0);
+	vget_lane_u32(arg_u32x2, 1);
+	vget_lane_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_u32(arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_u32(arg_u32x4, 0);
+	vgetq_lane_u32(arg_u32x4, 3);
+	vgetq_lane_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_u32(arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_u64(uint64x1_t arg_u64x1, uint64x2_t arg_u64x2) {
+	vdupd_lane_u64(arg_u64x1, 0);
+	vdupd_lane_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_lane_u64(arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupd_laneq_u64(arg_u64x2, 0);
+	vdupd_laneq_u64(arg_u64x2, 1);
+	vdupd_laneq_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_laneq_u64(arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_u64(arg_u64x1, 0);
+	vget_lane_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_u64(arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_u64(arg_u64x2, 0);
+	vgetq_lane_u64(arg_u64x2, 1);
+	vgetq_lane_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_u64(arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_f32(float32x4_t arg_f32x4, float32x2_t arg_f32x2) {
+	vdups_lane_f32(arg_f32x2, 0);
+	vdups_lane_f32(arg_f32x2, 1);
+	vdups_lane_f32(arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_lane_f32(arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdups_laneq_f32(arg_f32x4, 0);
+	vdups_laneq_f32(arg_f32x4, 3);
+	vdups_laneq_f32(arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_laneq_f32(arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_f32(arg_f32x2, 0);
+	vget_lane_f32(arg_f32x2, 1);
+	vget_lane_f32(arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_f32(arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_f32(arg_f32x4, 0);
+	vgetq_lane_f32(arg_f32x4, 3);
+	vgetq_lane_f32(arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_f32(arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_f64(float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vdupd_lane_f64(arg_f64x1, 0);
+	vdupd_lane_f64(arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_lane_f64(arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupd_laneq_f64(arg_f64x2, 0);
+	vdupd_laneq_f64(arg_f64x2, 1);
+	vdupd_laneq_f64(arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_laneq_f64(arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_f64(arg_f64x1, 0);
+	vget_lane_f64(arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_f64(arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_f64(arg_f64x2, 0);
+	vgetq_lane_f64(arg_f64x2, 1);
+	vgetq_lane_f64(arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_f64(arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vdupb_lane_p8(arg_p8x8, 0);
+	vdupb_lane_p8(arg_p8x8, 7);
+	vdupb_lane_p8(arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_lane_p8(arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupb_laneq_p8(arg_p8x16, 0);
+	vdupb_laneq_p8(arg_p8x16, 15);
+	vdupb_laneq_p8(arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_laneq_p8(arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_p8(arg_p8x8, 0);
+	vget_lane_p8(arg_p8x8, 7);
+	vget_lane_p8(arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_p8(arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_p8(arg_p8x16, 0);
+	vgetq_lane_p8(arg_p8x16, 15);
+	vgetq_lane_p8(arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_p8(arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8) {
+	vduph_lane_p16(arg_p16x4, 0);
+	vduph_lane_p16(arg_p16x4, 3);
+	vduph_lane_p16(arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_lane_p16(arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_laneq_p16(arg_p16x8, 0);
+	vduph_laneq_p16(arg_p16x8, 7);
+	vduph_laneq_p16(arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_laneq_p16(arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_p16(arg_p16x4, 0);
+	vget_lane_p16(arg_p16x4, 3);
+	vget_lane_p16(arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_p16(arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_p16(arg_p16x8, 0);
+	vgetq_lane_p16(arg_p16x8, 7);
+	vgetq_lane_p16(arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_p16(arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_p64(poly64x2_t arg_p64x2, poly64x1_t arg_p64x1) {
+	vget_lane_p64(arg_p64x1, 0);
+	vget_lane_p64(arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_p64(arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_p64(arg_p64x2, 0);
+	vgetq_lane_p64(arg_p64x2, 1);
+	vgetq_lane_p64(arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_p64(arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_f16(float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vget_lane_f16(arg_f16x4, 0);
+	vget_lane_f16(arg_f16x4, 3);
+	vget_lane_f16(arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_f16(arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_f16(arg_f16x8, 0);
+	vgetq_lane_f16(arg_f16x8, 7);
+	vgetq_lane_f16(arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_f16(arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c
new file mode 100644
index 000000000000..0453e56401a6
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c
@@ -0,0 +1,170 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+void test_extract_vector_from_a_pair_of_vectors_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vext_s8(arg_i8x8, arg_i8x8, 0);
+	vext_s8(arg_i8x8, arg_i8x8, 7);
+	vext_s8(arg_i8x8, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_s8(arg_i8x8, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_s8(arg_i8x16, arg_i8x16, 0);
+	vextq_s8(arg_i8x16, arg_i8x16, 15);
+	vextq_s8(arg_i8x16, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_s8(arg_i8x16, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_s16(int16x8_t arg_i16x8, int16x4_t arg_i16x4) {
+	vext_s16(arg_i16x4, arg_i16x4, 0);
+	vext_s16(arg_i16x4, arg_i16x4, 3);
+	vext_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_s16(arg_i16x8, arg_i16x8, 0);
+	vextq_s16(arg_i16x8, arg_i16x8, 7);
+	vextq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vext_s32(arg_i32x2, arg_i32x2, 0);
+	vext_s32(arg_i32x2, arg_i32x2, 1);
+	vext_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_s32(arg_i32x4, arg_i32x4, 0);
+	vextq_s32(arg_i32x4, arg_i32x4, 3);
+	vextq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_s64(int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vext_s64(arg_i64x1, arg_i64x1, 0);
+	vext_s64(arg_i64x1, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_s64(arg_i64x1, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_s64(arg_i64x2, arg_i64x2, 0);
+	vextq_s64(arg_i64x2, arg_i64x2, 1);
+	vextq_s64(arg_i64x2, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_s64(arg_i64x2, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vext_u8(arg_u8x8, arg_u8x8, 0);
+	vext_u8(arg_u8x8, arg_u8x8, 7);
+	vext_u8(arg_u8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_u8(arg_u8x8, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_u8(arg_u8x16, arg_u8x16, 0);
+	vextq_u8(arg_u8x16, arg_u8x16, 15);
+	vextq_u8(arg_u8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_u8(arg_u8x16, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vext_u16(arg_u16x4, arg_u16x4, 0);
+	vext_u16(arg_u16x4, arg_u16x4, 3);
+	vext_u16(arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_u16(arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_u16(arg_u16x8, arg_u16x8, 0);
+	vextq_u16(arg_u16x8, arg_u16x8, 7);
+	vextq_u16(arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_u16(arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vext_u32(arg_u32x2, arg_u32x2, 0);
+	vext_u32(arg_u32x2, arg_u32x2, 1);
+	vext_u32(arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_u32(arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_u32(arg_u32x4, arg_u32x4, 0);
+	vextq_u32(arg_u32x4, arg_u32x4, 3);
+	vextq_u32(arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_u32(arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_u64(uint64x1_t arg_u64x1, uint64x2_t arg_u64x2) {
+	vext_u64(arg_u64x1, arg_u64x1, 0);
+	vext_u64(arg_u64x1, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_u64(arg_u64x1, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_u64(arg_u64x2, arg_u64x2, 0);
+	vextq_u64(arg_u64x2, arg_u64x2, 1);
+	vextq_u64(arg_u64x2, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_u64(arg_u64x2, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_p64(poly64x2_t arg_p64x2, poly64x1_t arg_p64x1) {
+	vext_p64(arg_p64x1, arg_p64x1, 0);
+	vext_p64(arg_p64x1, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_p64(arg_p64x1, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_p64(arg_p64x2, arg_p64x2, 0);
+	vextq_p64(arg_p64x2, arg_p64x2, 1);
+	vextq_p64(arg_p64x2, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_p64(arg_p64x2, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_f32(float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vext_f32(arg_f32x2, arg_f32x2, 0);
+	vext_f32(arg_f32x2, arg_f32x2, 1);
+	vext_f32(arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_f32(arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_f32(arg_f32x4, arg_f32x4, 0);
+	vextq_f32(arg_f32x4, arg_f32x4, 3);
+	vextq_f32(arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_f32(arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_f64(float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vext_f64(arg_f64x1, arg_f64x1, 0);
+	vext_f64(arg_f64x1, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_f64(arg_f64x1, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_f64(arg_f64x2, arg_f64x2, 0);
+	vextq_f64(arg_f64x2, arg_f64x2, 1);
+	vextq_f64(arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_f64(arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_p8(poly8x8_t arg_p8x8, poly8x16_t arg_p8x16) {
+	vext_p8(arg_p8x8, arg_p8x8, 0);
+	vext_p8(arg_p8x8, arg_p8x8, 7);
+	vext_p8(arg_p8x8, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_p8(arg_p8x8, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_p8(arg_p8x16, arg_p8x16, 0);
+	vextq_p8(arg_p8x16, arg_p8x16, 15);
+	vextq_p8(arg_p8x16, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_p8(arg_p8x16, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_p16(poly16x8_t arg_p16x8, poly16x4_t arg_p16x4) {
+	vext_p16(arg_p16x4, arg_p16x4, 0);
+	vext_p16(arg_p16x4, arg_p16x4, 3);
+	vext_p16(arg_p16x4, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_p16(arg_p16x4, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_p16(arg_p16x8, arg_p16x8, 0);
+	vextq_p16(arg_p16x8, arg_p16x8, 7);
+	vextq_p16(arg_p16x8, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_p16(arg_p16x8, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c
new file mode 100644
index 000000000000..3a90b445f358
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -target-feature +v8.2a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+#include <arm_fp16.h>
+// REQUIRES: aarch64-registered-target
+
+void test_conversions_s16(int16_t arg_i16) {
+	vcvth_n_f16_s16(arg_i16, 1);
+	vcvth_n_f16_s16(arg_i16, 16);
+	vcvth_n_f16_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_f16_s16(arg_i16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_conversions_s32(int32_t arg_i32) {
+	vcvth_n_f16_s32(arg_i32, 1);
+	vcvth_n_f16_s32(arg_i32, 16);
+	vcvth_n_f16_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_f16_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_conversions_s64(int64_t arg_i64) {
+	vcvth_n_f16_s64(arg_i64, 1);
+	vcvth_n_f16_s64(arg_i64, 16);
+	vcvth_n_f16_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_f16_s64(arg_i64, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_conversions_u16(uint16_t arg_u16) {
+	vcvth_n_f16_u16(arg_u16, 1);
+	vcvth_n_f16_u16(arg_u16, 16);
+	vcvth_n_f16_u16(arg_u16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_f16_u16(arg_u16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_conversions_u32(uint32_t arg_u32) {
+	vcvth_n_f16_u32(arg_u32, 1);
+	vcvth_n_f16_u32(arg_u32, 16);
+	vcvth_n_f16_u32(arg_u32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_f16_u32(arg_u32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_conversions_u64(uint64_t arg_u64) {
+	vcvth_n_f16_u64(arg_u64, 1);
+	vcvth_n_f16_u64(arg_u64, 16);
+	vcvth_n_f16_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_f16_u64(arg_u64, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_conversions_f16(float16_t arg_f16) {
+	vcvth_n_s16_f16(arg_f16, 1);
+	vcvth_n_s16_f16(arg_f16, 16);
+	vcvth_n_s16_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_s16_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvth_n_s32_f16(arg_f16, 1);
+	vcvth_n_s32_f16(arg_f16, 16);
+	vcvth_n_s32_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_s32_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvth_n_s64_f16(arg_f16, 1);
+	vcvth_n_s64_f16(arg_f16, 16);
+	vcvth_n_s64_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_s64_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvth_n_u16_f16(arg_f16, 1);
+	vcvth_n_u16_f16(arg_f16, 16);
+	vcvth_n_u16_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_u16_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvth_n_u32_f16(arg_f16, 1);
+	vcvth_n_u32_f16(arg_f16, 16);
+	vcvth_n_u32_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_u32_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvth_n_u64_f16(arg_f16, 1);
+	vcvth_n_u64_f16(arg_f16, 16);
+	vcvth_n_u64_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_u64_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/fp16-v84.c clang/test/Sema/aarch64-neon-immediate-ranges/fp16-v84.c
new file mode 100644
index 000000000000..d31cf321d761
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/fp16-v84.c
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.4a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_fused_multiply_accumulate_f16(float32x2_t arg_f32x2, float32x4_t arg_f32x4, float16x4_t arg_f16x4, float16x8_t arg_f16x8) {
+	vfmlal_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 0);
+	vfmlal_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 3);
+	vfmlal_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlal_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlal_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 0);
+	vfmlal_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 7);
+	vfmlal_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlal_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlalq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 0);
+	vfmlalq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 3);
+	vfmlalq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlalq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlalq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 0);
+	vfmlalq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 7);
+	vfmlalq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlalq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlsl_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 0);
+	vfmlsl_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 3);
+	vfmlsl_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlsl_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlsl_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 0);
+	vfmlsl_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 7);
+	vfmlsl_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlsl_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlslq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 0);
+	vfmlslq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 3);
+	vfmlslq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlslq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlslq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 0);
+	vfmlslq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 7);
+	vfmlslq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlslq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlal_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 0);
+	vfmlal_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 3);
+	vfmlal_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlal_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlsl_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 0);
+	vfmlsl_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 3);
+	vfmlsl_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlsl_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlalq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 0);
+	vfmlalq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 3);
+	vfmlalq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlalq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlslq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 0);
+	vfmlslq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 3);
+	vfmlslq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlslq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlal_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 0);
+	vfmlal_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 7);
+	vfmlal_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlal_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlsl_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 0);
+	vfmlsl_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 7);
+	vfmlsl_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlsl_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlalq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 0);
+	vfmlalq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 7);
+	vfmlalq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlalq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlslq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 0);
+	vfmlslq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 7);
+	vfmlslq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlslq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/fp16-vector.c clang/test/Sema/aarch64-neon-immediate-ranges/fp16-vector.c
new file mode 100644
index 000000000000..6460018b7440
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/fp16-vector.c
@@ -0,0 +1,181 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -target-feature +v8.2a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+#include <arm_fp16.h>
+// REQUIRES: aarch64-registered-target
+
+// vcvtq_n_f16_u16 is tested under clang/test/Sema/arm-mve-immediates.c
+
+void test_multiplication_f16(float16_t arg_f16, float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vmul_lane_f16(arg_f16x4, arg_f16x4, 0);
+	vmul_lane_f16(arg_f16x4, arg_f16x4, 3);
+	vmul_lane_f16(arg_f16x4, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_f16(arg_f16x4, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_f16(arg_f16x8, arg_f16x4, 0);
+	vmulq_lane_f16(arg_f16x8, arg_f16x4, 3);
+	vmulq_lane_f16(arg_f16x8, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_f16(arg_f16x8, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_f16(arg_f16x4, arg_f16x8, 0);
+	vmul_laneq_f16(arg_f16x4, arg_f16x8, 7);
+	vmul_laneq_f16(arg_f16x4, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_f16(arg_f16x4, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_f16(arg_f16x8, arg_f16x8, 0);
+	vmulq_laneq_f16(arg_f16x8, arg_f16x8, 7);
+	vmulq_laneq_f16(arg_f16x8, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_f16(arg_f16x8, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulh_lane_f16(arg_f16, arg_f16x4, 0);
+	vmulh_lane_f16(arg_f16, arg_f16x4, 3);
+	vmulh_lane_f16(arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulh_lane_f16(arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulh_laneq_f16(arg_f16, arg_f16x8, 0);
+	vmulh_laneq_f16(arg_f16, arg_f16x8, 7);
+	vmulh_laneq_f16(arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulh_laneq_f16(arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_multiply_extended_f16(float16_t arg_f16, float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vmulx_lane_f16(arg_f16x4, arg_f16x4, 0);
+	vmulx_lane_f16(arg_f16x4, arg_f16x4, 3);
+	vmulx_lane_f16(arg_f16x4, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_lane_f16(arg_f16x4, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_lane_f16(arg_f16x8, arg_f16x4, 0);
+	vmulxq_lane_f16(arg_f16x8, arg_f16x4, 3);
+	vmulxq_lane_f16(arg_f16x8, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_lane_f16(arg_f16x8, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulx_laneq_f16(arg_f16x4, arg_f16x8, 0);
+	vmulx_laneq_f16(arg_f16x4, arg_f16x8, 7);
+	vmulx_laneq_f16(arg_f16x4, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_laneq_f16(arg_f16x4, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_laneq_f16(arg_f16x8, arg_f16x8, 0);
+	vmulxq_laneq_f16(arg_f16x8, arg_f16x8, 7);
+	vmulxq_laneq_f16(arg_f16x8, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_laneq_f16(arg_f16x8, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxh_lane_f16(arg_f16, arg_f16x4, 0);
+	vmulxh_lane_f16(arg_f16, arg_f16x4, 3);
+	vmulxh_lane_f16(arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxh_lane_f16(arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxh_laneq_f16(arg_f16, arg_f16x8, 0);
+	vmulxh_laneq_f16(arg_f16, arg_f16x8, 7);
+	vmulxh_laneq_f16(arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxh_laneq_f16(arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_fused_multiply_accumulate_f16(float16_t arg_f16, float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vfma_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 0);
+	vfma_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 3);
+	vfma_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 0);
+	vfmaq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 3);
+	vfmaq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfma_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 0);
+	vfma_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 7);
+	vfma_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 0);
+	vfmaq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 7);
+	vfmaq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmah_lane_f16(arg_f16, arg_f16, arg_f16x4, 0);
+	vfmah_lane_f16(arg_f16, arg_f16, arg_f16x4, 3);
+	vfmah_lane_f16(arg_f16, arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmah_lane_f16(arg_f16, arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmah_laneq_f16(arg_f16, arg_f16, arg_f16x8, 0);
+	vfmah_laneq_f16(arg_f16, arg_f16, arg_f16x8, 7);
+	vfmah_laneq_f16(arg_f16, arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmah_laneq_f16(arg_f16, arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 0);
+	vfms_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 3);
+	vfms_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 0);
+	vfmsq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 3);
+	vfmsq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 0);
+	vfms_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 7);
+	vfms_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 0);
+	vfmsq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 7);
+	vfmsq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsh_lane_f16(arg_f16, arg_f16, arg_f16x4, 0);
+	vfmsh_lane_f16(arg_f16, arg_f16, arg_f16x4, 3);
+	vfmsh_lane_f16(arg_f16, arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsh_lane_f16(arg_f16, arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsh_laneq_f16(arg_f16, arg_f16, arg_f16x8, 0);
+	vfmsh_laneq_f16(arg_f16, arg_f16, arg_f16x8, 7);
+	vfmsh_laneq_f16(arg_f16, arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsh_laneq_f16(arg_f16, arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_s16(int16x8_t arg_i16x8, int16x4_t arg_i16x4) {
+	vcvt_n_f16_s16(arg_i16x4, 1);
+	vcvt_n_f16_s16(arg_i16x4, 16);
+	vcvt_n_f16_s16(arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f16_s16(arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f16_s16(arg_i16x8, 1);
+	vcvtq_n_f16_s16(arg_i16x8, 16);
+	vcvtq_n_f16_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f16_s16(arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vcvt_n_f16_u16(arg_u16x4, 1);
+	vcvt_n_f16_u16(arg_u16x4, 16);
+	vcvt_n_f16_u16(arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f16_u16(arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_f16(float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vcvt_n_s16_f16(arg_f16x4, 1);
+	vcvt_n_s16_f16(arg_f16x4, 16);
+	vcvt_n_s16_f16(arg_f16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_s16_f16(arg_f16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_s16_f16(arg_f16x8, 1);
+	vcvtq_n_s16_f16(arg_f16x8, 16);
+	vcvtq_n_s16_f16(arg_f16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_s16_f16(arg_f16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvt_n_u16_f16(arg_f16x4, 1);
+	vcvt_n_u16_f16(arg_f16x4, 16);
+	vcvt_n_u16_f16(arg_f16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_u16_f16(arg_f16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_u16_f16(arg_f16x8, 1);
+	vcvtq_n_u16_f16(arg_f16x8, 16);
+	vcvtq_n_u16_f16(arg_f16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_u16_f16(arg_f16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c
new file mode 100644
index 000000000000..c65a2e6e6533
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_fused_multiply_accumulate_f32(float32x2_t arg_f32x2, float32_t arg_f32, float32x4_t arg_f32x4) {
+	vfma_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 0);
+	vfma_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 1);
+	vfma_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 0);
+	vfmaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 1);
+	vfmaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmas_lane_f32(arg_f32, arg_f32, arg_f32x2, 0);
+	vfmas_lane_f32(arg_f32, arg_f32, arg_f32x2, 1);
+	vfmas_lane_f32(arg_f32, arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmas_lane_f32(arg_f32, arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfma_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 0);
+	vfma_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 3);
+	vfma_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 0);
+	vfmaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 3);
+	vfmaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmas_laneq_f32(arg_f32, arg_f32, arg_f32x4, 0);
+	vfmas_laneq_f32(arg_f32, arg_f32, arg_f32x4, 3);
+	vfmas_laneq_f32(arg_f32, arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmas_laneq_f32(arg_f32, arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 0);
+	vfms_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 1);
+	vfms_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 0);
+	vfmsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 1);
+	vfmsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmss_lane_f32(arg_f32, arg_f32, arg_f32x2, 0);
+	vfmss_lane_f32(arg_f32, arg_f32, arg_f32x2, 1);
+	vfmss_lane_f32(arg_f32, arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmss_lane_f32(arg_f32, arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 0);
+	vfms_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 3);
+	vfms_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 0);
+	vfmsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 3);
+	vfmsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmss_laneq_f32(arg_f32, arg_f32, arg_f32x4, 0);
+	vfmss_laneq_f32(arg_f32, arg_f32, arg_f32x4, 3);
+	vfmss_laneq_f32(arg_f32, arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmss_laneq_f32(arg_f32, arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_fused_multiply_accumulate_f64(float64x2_t arg_f64x2, float64_t arg_f64, float64x1_t arg_f64x1) {
+	vfma_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, 0);
+	vfma_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, 0);
+	vfmaq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmad_lane_f64(arg_f64, arg_f64, arg_f64x1, 0);
+	vfmad_lane_f64(arg_f64, arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmad_lane_f64(arg_f64, arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 0);
+	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 1);
+	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 0);
+	vfmaq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 1);
+	vfmaq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, 0);
+	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, 1);
+	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, 0);
+	vfms_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, 0);
+	vfmsq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsd_lane_f64(arg_f64, arg_f64, arg_f64x1, 0);
+	vfmsd_lane_f64(arg_f64, arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsd_lane_f64(arg_f64, arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 0);
+	vfms_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 1);
+	vfms_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 0);
+	vfmsq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 1);
+	vfmsq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsd_laneq_f64(arg_f64, arg_f64, arg_f64x2, 0);
+	vfmsd_laneq_f64(arg_f64, arg_f64, arg_f64x2, 1);
+	vfmsd_laneq_f64(arg_f64, arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsd_laneq_f64(arg_f64, arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/luti.c clang/test/Sema/aarch64-neon-immediate-ranges/luti.c
new file mode 100644
index 000000000000..bed8cbc1481d
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/luti.c
@@ -0,0 +1,283 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// 2-bit indices
+
+void test_lookup_read_2bit_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vluti2_lane_u8(arg_u8x8, arg_u8x8, 0);
+	vluti2_lane_u8(arg_u8x8, arg_u8x8, 1);
+	vluti2_lane_u8(arg_u8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_u8(arg_u8x8, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_u8(arg_u8x8, arg_u8x16, 0);
+	vluti2_laneq_u8(arg_u8x8, arg_u8x16, 3);
+	vluti2_laneq_u8(arg_u8x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_u8(arg_u8x8, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_u8(arg_u8x16, arg_u8x8, 0);
+	vluti2q_lane_u8(arg_u8x16, arg_u8x8, 1);
+	vluti2q_lane_u8(arg_u8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_u8(arg_u8x16, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_u8(arg_u8x16, arg_u8x16, 0);
+	vluti2q_laneq_u8(arg_u8x16, arg_u8x16, 3);
+	vluti2q_laneq_u8(arg_u8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_u8(arg_u8x16, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_s8(int8x8_t arg_i8x8, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8, int8x16_t arg_i8x16) {
+	vluti2_lane_s8(arg_i8x8, arg_u8x8, 0);
+	vluti2_lane_s8(arg_i8x8, arg_u8x8, 1);
+	vluti2_lane_s8(arg_i8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_s8(arg_i8x8, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_s8(arg_i8x8, arg_u8x16, 0);
+	vluti2_laneq_s8(arg_i8x8, arg_u8x16, 3);
+	vluti2_laneq_s8(arg_i8x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_s8(arg_i8x8, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_s8(arg_i8x16, arg_u8x8, 0);
+	vluti2q_lane_s8(arg_i8x16, arg_u8x8, 1);
+	vluti2q_lane_s8(arg_i8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_s8(arg_i8x16, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_s8(arg_i8x16, arg_u8x16, 0);
+	vluti2q_laneq_s8(arg_i8x16, arg_u8x16, 3);
+	vluti2q_laneq_s8(arg_i8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_s8(arg_i8x16, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vluti2_lane_p8(arg_p8x8, arg_u8x8, 0);
+	vluti2_lane_p8(arg_p8x8, arg_u8x8, 1);
+	vluti2_lane_p8(arg_p8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_p8(arg_p8x8, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_p8(arg_p8x8, arg_u8x16, 0);
+	vluti2_laneq_p8(arg_p8x8, arg_u8x16, 3);
+	vluti2_laneq_p8(arg_p8x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_p8(arg_p8x8, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_p8(arg_p8x16, arg_u8x8, 0);
+	vluti2q_lane_p8(arg_p8x16, arg_u8x8, 1);
+	vluti2q_lane_p8(arg_p8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_p8(arg_p8x16, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_p8(arg_p8x16, arg_u8x16, 0);
+	vluti2q_laneq_p8(arg_p8x16, arg_u8x16, 3);
+	vluti2q_laneq_p8(arg_p8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_p8(arg_p8x16, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_u16(uint16x4_t arg_u16x4, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8, uint16x8_t arg_u16x8) {
+	vluti2_lane_u16(arg_u16x4, arg_u8x8, 0);
+	vluti2_lane_u16(arg_u16x4, arg_u8x8, 3);
+	vluti2_lane_u16(arg_u16x4, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_u16(arg_u16x4, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_u16(arg_u16x4, arg_u8x16, 0);
+	vluti2_laneq_u16(arg_u16x4, arg_u8x16, 7);
+	vluti2_laneq_u16(arg_u16x4, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_u16(arg_u16x4, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_u16(arg_u16x8, arg_u8x8, 0);
+	vluti2q_lane_u16(arg_u16x8, arg_u8x8, 3);
+	vluti2q_lane_u16(arg_u16x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_u16(arg_u16x8, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_u16(arg_u16x8, arg_u8x16, 0);
+	vluti2q_laneq_u16(arg_u16x8, arg_u8x16, 7);
+	vluti2q_laneq_u16(arg_u16x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_u16(arg_u16x8, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vluti2_lane_s16(arg_i16x4, arg_u8x8, 0);
+	vluti2_lane_s16(arg_i16x4, arg_u8x8, 3);
+	vluti2_lane_s16(arg_i16x4, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_s16(arg_i16x4, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_s16(arg_i16x4, arg_u8x16, 0);
+	vluti2_laneq_s16(arg_i16x4, arg_u8x16, 7);
+	vluti2_laneq_s16(arg_i16x4, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_s16(arg_i16x4, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_s16(arg_i16x8, arg_u8x8, 0);
+	vluti2q_lane_s16(arg_i16x8, arg_u8x8, 3);
+	vluti2q_lane_s16(arg_i16x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_s16(arg_i16x8, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_s16(arg_i16x8, arg_u8x16, 0);
+	vluti2q_laneq_s16(arg_i16x8, arg_u8x16, 7);
+	vluti2q_laneq_s16(arg_i16x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_s16(arg_i16x8, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_f16(float16x8_t arg_f16x8, uint8x16_t arg_u8x16, float16x4_t arg_f16x4, uint8x8_t arg_u8x8) {
+	vluti2_lane_f16(arg_f16x4, arg_u8x8, 0);
+	vluti2_lane_f16(arg_f16x4, arg_u8x8, 3);
+	vluti2_lane_f16(arg_f16x4, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_f16(arg_f16x4, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_f16(arg_f16x4, arg_u8x16, 0);
+	vluti2_laneq_f16(arg_f16x4, arg_u8x16, 7);
+	vluti2_laneq_f16(arg_f16x4, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_f16(arg_f16x4, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_f16(arg_f16x8, arg_u8x8, 0);
+	vluti2q_lane_f16(arg_f16x8, arg_u8x8, 3);
+	vluti2q_lane_f16(arg_f16x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_f16(arg_f16x8, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_f16(arg_f16x8, arg_u8x16, 0);
+	vluti2q_laneq_f16(arg_f16x8, arg_u8x16, 7);
+	vluti2q_laneq_f16(arg_f16x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_f16(arg_f16x8, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_bf16(bfloat16x4_t arg_b16x4, bfloat16x8_t arg_b16x8, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vluti2_lane_bf16(arg_b16x4, arg_u8x8, 0);
+	vluti2_lane_bf16(arg_b16x4, arg_u8x8, 3);
+	vluti2_lane_bf16(arg_b16x4, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_bf16(arg_b16x4, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_bf16(arg_b16x4, arg_u8x16, 0);
+	vluti2_laneq_bf16(arg_b16x4, arg_u8x16, 7);
+	vluti2_laneq_bf16(arg_b16x4, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_bf16(arg_b16x4, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_bf16(arg_b16x8, arg_u8x8, 0);
+	vluti2q_lane_bf16(arg_b16x8, arg_u8x8, 3);
+	vluti2q_lane_bf16(arg_b16x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_bf16(arg_b16x8, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_bf16(arg_b16x8, arg_u8x16, 0);
+	vluti2q_laneq_bf16(arg_b16x8, arg_u8x16, 7);
+	vluti2q_laneq_bf16(arg_b16x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_bf16(arg_b16x8, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vluti2_lane_p16(arg_p16x4, arg_u8x8, 0);
+	vluti2_lane_p16(arg_p16x4, arg_u8x8, 3);
+	vluti2_lane_p16(arg_p16x4, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_p16(arg_p16x4, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_p16(arg_p16x4, arg_u8x16, 0);
+	vluti2_laneq_p16(arg_p16x4, arg_u8x16, 7);
+	vluti2_laneq_p16(arg_p16x4, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_p16(arg_p16x4, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_p16(arg_p16x8, arg_u8x8, 0);
+	vluti2q_lane_p16(arg_p16x8, arg_u8x8, 3);
+	vluti2q_lane_p16(arg_p16x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_p16(arg_p16x8, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_p16(arg_p16x8, arg_u8x16, 0);
+	vluti2q_laneq_p16(arg_p16x8, arg_u8x16, 7);
+	vluti2q_laneq_p16(arg_p16x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_p16(arg_p16x8, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+// 4-bit indices 
+
+void test_lookup_read_4bit_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vluti4q_lane_u8(arg_u8x16, arg_u8x8, 0);
+	vluti4q_lane_u8(arg_u8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_u8(arg_u8x16, arg_u8x8, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_u8(arg_u8x16, arg_u8x16, 0);
+	vluti4q_laneq_u8(arg_u8x16, arg_u8x16, 1);
+	vluti4q_laneq_u8(arg_u8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_u8(arg_u8x16, arg_u8x16, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_4bit_s8(int8x16_t arg_i8x16, uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vluti4q_lane_s8(arg_i8x16, arg_u8x8, 0);
+	vluti4q_lane_s8(arg_i8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_s8(arg_i8x16, arg_u8x8, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_s8(arg_i8x16, arg_u8x16, 0);
+	vluti4q_laneq_s8(arg_i8x16, arg_u8x16, 1);
+	vluti4q_laneq_s8(arg_i8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_s8(arg_i8x16, arg_u8x16, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_4bit_p8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16, poly8x16_t arg_p8x16) {
+	vluti4q_lane_p8(arg_p8x16, arg_u8x8, 0);
+	vluti4q_lane_p8(arg_p8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_p8(arg_p8x16, arg_u8x8, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_p8(arg_p8x16, arg_u8x16, 0);
+	vluti4q_laneq_p8(arg_p8x16, arg_u8x16, 1);
+	vluti4q_laneq_p8(arg_p8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_p8(arg_p8x16, arg_u8x16, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_4bit_x2(int16x8x2_t arg_i16x8x2, uint8x8_t arg_u8x8, float16x8x2_t arg_f16x8x2, uint8x16_t arg_u8x16, poly16x8x2_t arg_p16x8x2, uint16x8x2_t arg_u16x8x2, bfloat16x8x2_t arg_b16x8x2) {
+	vluti4q_lane_u16_x2(arg_u16x8x2, arg_u8x8, 0);
+	vluti4q_lane_u16_x2(arg_u16x8x2, arg_u8x8, 1);
+	vluti4q_lane_u16_x2(arg_u16x8x2, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_u16_x2(arg_u16x8x2, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_u16_x2(arg_u16x8x2, arg_u8x16, 0);
+	vluti4q_laneq_u16_x2(arg_u16x8x2, arg_u8x16, 3);
+	vluti4q_laneq_u16_x2(arg_u16x8x2, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_u16_x2(arg_u16x8x2, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_lane_s16_x2(arg_i16x8x2, arg_u8x8, 0);
+	vluti4q_lane_s16_x2(arg_i16x8x2, arg_u8x8, 1);
+	vluti4q_lane_s16_x2(arg_i16x8x2, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_s16_x2(arg_i16x8x2, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_s16_x2(arg_i16x8x2, arg_u8x16, 0);
+	vluti4q_laneq_s16_x2(arg_i16x8x2, arg_u8x16, 3);
+	vluti4q_laneq_s16_x2(arg_i16x8x2, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_s16_x2(arg_i16x8x2, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_lane_f16_x2(arg_f16x8x2, arg_u8x8, 0);
+	vluti4q_lane_f16_x2(arg_f16x8x2, arg_u8x8, 1);
+	vluti4q_lane_f16_x2(arg_f16x8x2, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_f16_x2(arg_f16x8x2, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_f16_x2(arg_f16x8x2, arg_u8x16, 0);
+	vluti4q_laneq_f16_x2(arg_f16x8x2, arg_u8x16, 3);
+	vluti4q_laneq_f16_x2(arg_f16x8x2, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_f16_x2(arg_f16x8x2, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_lane_bf16_x2(arg_b16x8x2, arg_u8x8, 0);
+	vluti4q_lane_bf16_x2(arg_b16x8x2, arg_u8x8, 1);
+	vluti4q_lane_bf16_x2(arg_b16x8x2, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_bf16_x2(arg_b16x8x2, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_bf16_x2(arg_b16x8x2, arg_u8x16, 0);
+	vluti4q_laneq_bf16_x2(arg_b16x8x2, arg_u8x16, 3);
+	vluti4q_laneq_bf16_x2(arg_b16x8x2, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_bf16_x2(arg_b16x8x2, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_lane_p16_x2(arg_p16x8x2, arg_u8x8, 0);
+	vluti4q_lane_p16_x2(arg_p16x8x2, arg_u8x8, 1);
+	vluti4q_lane_p16_x2(arg_p16x8x2, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_p16_x2(arg_p16x8x2, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_p16_x2(arg_p16x8x2, arg_u8x16, 0);
+	vluti4q_laneq_p16_x2(arg_p16x8x2, arg_u8x16, 3);
+	vluti4q_laneq_p16_x2(arg_p16x8x2, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_p16_x2(arg_p16x8x2, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/matrix-multiplication.c clang/test/Sema/aarch64-neon-immediate-ranges/matrix-multiplication.c
new file mode 100644
index 000000000000..dd501b84bae4
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/matrix-multiplication.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.6a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_dot_product_s32(int8x8_t arg_i8x8, int32x2_t arg_i32x2, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8,
+						  int32x4_t arg_i32x4, int8x16_t arg_i8x16) {
+	vusdot_lane_s32(arg_i32x2, arg_u8x8, arg_i8x8, 0);
+	vusdot_lane_s32(arg_i32x2, arg_u8x8, arg_i8x8, 1);
+	vusdot_lane_s32(arg_i32x2, arg_u8x8, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vusdot_lane_s32(arg_i32x2, arg_u8x8, arg_i8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsudot_lane_s32(arg_i32x2, arg_i8x8, arg_u8x8, 0);
+	vsudot_lane_s32(arg_i32x2, arg_i8x8, arg_u8x8, 1);
+	vsudot_lane_s32(arg_i32x2, arg_i8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsudot_lane_s32(arg_i32x2, arg_i8x8, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vusdot_laneq_s32(arg_i32x2, arg_u8x8, arg_i8x16, 0);
+	vusdot_laneq_s32(arg_i32x2, arg_u8x8, arg_i8x16, 3);
+	vusdot_laneq_s32(arg_i32x2, arg_u8x8, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vusdot_laneq_s32(arg_i32x2, arg_u8x8, arg_i8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsudot_laneq_s32(arg_i32x2, arg_i8x8, arg_u8x16, 0);
+	vsudot_laneq_s32(arg_i32x2, arg_i8x8, arg_u8x16, 3);
+	vsudot_laneq_s32(arg_i32x2, arg_i8x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsudot_laneq_s32(arg_i32x2, arg_i8x8, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vusdotq_lane_s32(arg_i32x4, arg_u8x16, arg_i8x8, 0);
+	vusdotq_lane_s32(arg_i32x4, arg_u8x16, arg_i8x8, 1);
+	vusdotq_lane_s32(arg_i32x4, arg_u8x16, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vusdotq_lane_s32(arg_i32x4, arg_u8x16, arg_i8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsudotq_lane_s32(arg_i32x4, arg_i8x16, arg_u8x8, 0);
+	vsudotq_lane_s32(arg_i32x4, arg_i8x16, arg_u8x8, 1);
+	vsudotq_lane_s32(arg_i32x4, arg_i8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsudotq_lane_s32(arg_i32x4, arg_i8x16, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vusdotq_laneq_s32(arg_i32x4, arg_u8x16, arg_i8x16, 0);
+	vusdotq_laneq_s32(arg_i32x4, arg_u8x16, arg_i8x16, 3);
+	vusdotq_laneq_s32(arg_i32x4, arg_u8x16, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vusdotq_laneq_s32(arg_i32x4, arg_u8x16, arg_i8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsudotq_laneq_s32(arg_i32x4, arg_i8x16, arg_u8x16, 0);
+	vsudotq_laneq_s32(arg_i32x4, arg_i8x16, arg_u8x16, 3);
+	vsudotq_laneq_s32(arg_i32x4, arg_i8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsudotq_laneq_s32(arg_i32x4, arg_i8x16, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/multiply-extended.c clang/test/Sema/aarch64-neon-immediate-ranges/multiply-extended.c
new file mode 100644
index 000000000000..8c679e7e6a7d
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/multiply-extended.c
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_multiply_extended_f32(float32_t arg_f32, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vmulx_lane_f32(arg_f32x2, arg_f32x2, 0);
+	vmulx_lane_f32(arg_f32x2, arg_f32x2, 1);
+	vmulx_lane_f32(arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_lane_f32(arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_lane_f32(arg_f32x4, arg_f32x2, 0);
+	vmulxq_lane_f32(arg_f32x4, arg_f32x2, 1);
+	vmulxq_lane_f32(arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_lane_f32(arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxs_lane_f32(arg_f32, arg_f32x2, 0);
+	vmulxs_lane_f32(arg_f32, arg_f32x2, 1);
+	vmulxs_lane_f32(arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxs_lane_f32(arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulx_laneq_f32(arg_f32x2, arg_f32x4, 0);
+	vmulx_laneq_f32(arg_f32x2, arg_f32x4, 3);
+	vmulx_laneq_f32(arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_laneq_f32(arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_laneq_f32(arg_f32x4, arg_f32x4, 0);
+	vmulxq_laneq_f32(arg_f32x4, arg_f32x4, 3);
+	vmulxq_laneq_f32(arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_laneq_f32(arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxs_laneq_f32(arg_f32, arg_f32x4, 0);
+	vmulxs_laneq_f32(arg_f32, arg_f32x4, 3);
+	vmulxs_laneq_f32(arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxs_laneq_f32(arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_multiply_extended_f64(float64_t arg_f64, float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vmulx_lane_f64(arg_f64x1, arg_f64x1, 0);
+	vmulx_lane_f64(arg_f64x1, arg_f64x1, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vmulx_lane_f64(arg_f64x1, arg_f64x1, 1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_lane_f64(arg_f64x2, arg_f64x1, 0);
+	vmulxq_lane_f64(arg_f64x2, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_lane_f64(arg_f64x2, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxd_lane_f64(arg_f64, arg_f64x1, 0);
+	vmulxd_lane_f64(arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxd_lane_f64(arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulx_laneq_f64(arg_f64x1, arg_f64x2, 0);
+	vmulx_laneq_f64(arg_f64x1, arg_f64x2, 1);
+	vmulx_laneq_f64(arg_f64x1, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_laneq_f64(arg_f64x1, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_laneq_f64(arg_f64x2, arg_f64x2, 0);
+	vmulxq_laneq_f64(arg_f64x2, arg_f64x2, 1);
+	vmulxq_laneq_f64(arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_laneq_f64(arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxd_laneq_f64(arg_f64, arg_f64x2, 0);
+	vmulxd_laneq_f64(arg_f64, arg_f64x2, 1);
+	vmulxd_laneq_f64(arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxd_laneq_f64(arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c
new file mode 100644
index 000000000000..854d6171a914
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c
@@ -0,0 +1,132 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_saturating_multiply_accumulate_s16(int16x4_t arg_i16x4, int32_t arg_i32, int16_t arg_i16,
+											 int32x4_t arg_i32x4, int16x8_t arg_i16x8) {
+	vqdmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 0);
+	vqdmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 3);
+	vqdmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlalh_lane_s16(arg_i32, arg_i16, arg_i16x4, 0);
+	vqdmlalh_lane_s16(arg_i32, arg_i16, arg_i16x4, 3);
+	vqdmlalh_lane_s16(arg_i32, arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlalh_lane_s16(arg_i32, arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 0);
+	vqdmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 3);
+	vqdmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 0);
+	vqdmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 7);
+	vqdmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlalh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 0);
+	vqdmlalh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 7);
+	vqdmlalh_laneq_s16(arg_i32, arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlalh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 0);
+	vqdmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 7);
+	vqdmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 0);
+	vqdmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 3);
+	vqdmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlslh_lane_s16(arg_i32, arg_i16, arg_i16x4, 0);
+	vqdmlslh_lane_s16(arg_i32, arg_i16, arg_i16x4, 3);
+	vqdmlslh_lane_s16(arg_i32, arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlslh_lane_s16(arg_i32, arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 0);
+	vqdmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 3);
+	vqdmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 0);
+	vqdmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 7);
+	vqdmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlslh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 0);
+	vqdmlslh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 7);
+	vqdmlslh_laneq_s16(arg_i32, arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlslh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 0);
+	vqdmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 7);
+	vqdmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_saturating_multiply_accumulate_s32(int32_t arg_i32, int32x4_t arg_i32x4, int64_t arg_i64, int64x2_t arg_i64x2, int32x2_t arg_i32x2) {
+	vqdmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 0);
+	vqdmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 1);
+	vqdmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, 0);
+	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, 1);
+	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 0);
+	vqdmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 1);
+	vqdmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 0);
+	vqdmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 3);
+	vqdmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, 0);
+	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, 3);
+	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 0);
+	vqdmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 3);
+	vqdmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 0);
+	vqdmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 1);
+	vqdmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, 0);
+	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, 1);
+	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 0);
+	vqdmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 1);
+	vqdmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 0);
+	vqdmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 3);
+	vqdmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsls_laneq_s32(arg_i64, arg_i32, arg_i32x4, 0);
+	vqdmlsls_laneq_s32(arg_i64, arg_i32, arg_i32x4, 3);
+	vqdmlsls_laneq_s32(arg_i64, arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsls_laneq_s32(arg_i64, arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 0);
+	vqdmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 3);
+	vqdmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c
new file mode 100644
index 000000000000..662a3c2ed172
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c
@@ -0,0 +1,193 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_saturating_multiply_by_scalar_and_widen_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8, int16_t arg_i16) {
+	vqdmull_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vqdmull_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vqdmull_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmullh_lane_s16(arg_i16, arg_i16x4, 0);
+	vqdmullh_lane_s16(arg_i16, arg_i16x4, 3);
+	vqdmullh_lane_s16(arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmullh_lane_s16(arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmull_high_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vqdmull_high_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vqdmull_high_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_high_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmull_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vqdmull_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vqdmull_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmullh_laneq_s16(arg_i16, arg_i16x8, 0);
+	vqdmullh_laneq_s16(arg_i16, arg_i16x8, 7);
+	vqdmullh_laneq_s16(arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmullh_laneq_s16(arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmull_high_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vqdmull_high_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vqdmull_high_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_high_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulh_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vqdmulh_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vqdmulh_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulh_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhq_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vqdmulhq_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vqdmulhq_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhq_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhh_lane_s16(arg_i16, arg_i16x4, 0);
+	vqdmulhh_lane_s16(arg_i16, arg_i16x4, 3);
+	vqdmulhh_lane_s16(arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhh_lane_s16(arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulh_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vqdmulh_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vqdmulh_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulh_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vqdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vqdmulhq_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhh_laneq_s16(arg_i16, arg_i16x8, 0);
+	vqdmulhh_laneq_s16(arg_i16, arg_i16x8, 7);
+	vqdmulhh_laneq_s16(arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhh_laneq_s16(arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulh_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vqrdmulh_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vqrdmulh_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulh_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhq_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vqrdmulhq_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vqrdmulhq_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhq_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhh_lane_s16(arg_i16, arg_i16x4, 0);
+	vqrdmulhh_lane_s16(arg_i16, arg_i16x4, 3);
+	vqrdmulhh_lane_s16(arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhh_lane_s16(arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulh_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vqrdmulh_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vqrdmulh_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulh_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vqrdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vqrdmulhq_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhh_laneq_s16(arg_i16, arg_i16x8, 0);
+	vqrdmulhh_laneq_s16(arg_i16, arg_i16x8, 7);
+	vqrdmulhh_laneq_s16(arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhh_laneq_s16(arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+
+void test_saturating_multiply_by_scalar_and_widen_s32(int32x2_t arg_i32x2, int32_t arg_i32, int32x4_t arg_i32x4) {
+	vqdmull_lane_s32(arg_i32x2, arg_i32x2, 0);
+	vqdmull_lane_s32(arg_i32x2, arg_i32x2, 1);
+	vqdmull_lane_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_lane_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulls_lane_s32(arg_i32, arg_i32x2, 0);
+	vqdmulls_lane_s32(arg_i32, arg_i32x2, 1);
+	vqdmulls_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulls_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmull_high_lane_s32(arg_i32x4, arg_i32x2, 0);
+	vqdmull_high_lane_s32(arg_i32x4, arg_i32x2, 1);
+	vqdmull_high_lane_s32(arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_high_lane_s32(arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmull_laneq_s32(arg_i32x2, arg_i32x4, 0);
+	vqdmull_laneq_s32(arg_i32x2, arg_i32x4, 3);
+	vqdmull_laneq_s32(arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_laneq_s32(arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulls_laneq_s32(arg_i32, arg_i32x4, 0);
+	vqdmulls_laneq_s32(arg_i32, arg_i32x4, 3);
+	vqdmulls_laneq_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulls_laneq_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmull_high_laneq_s32(arg_i32x4, arg_i32x4, 0);
+	vqdmull_high_laneq_s32(arg_i32x4, arg_i32x4, 3);
+	vqdmull_high_laneq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_high_laneq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulh_lane_s32(arg_i32x2, arg_i32x2, 0);
+	vqdmulh_lane_s32(arg_i32x2, arg_i32x2, 1);
+	vqdmulh_lane_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulh_lane_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhq_lane_s32(arg_i32x4, arg_i32x2, 0);
+	vqdmulhq_lane_s32(arg_i32x4, arg_i32x2, 1);
+	vqdmulhq_lane_s32(arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhq_lane_s32(arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhs_lane_s32(arg_i32, arg_i32x2, 0);
+	vqdmulhs_lane_s32(arg_i32, arg_i32x2, 1);
+	vqdmulhs_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhs_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulh_laneq_s32(arg_i32x2, arg_i32x4, 0);
+	vqdmulh_laneq_s32(arg_i32x2, arg_i32x4, 3);
+	vqdmulh_laneq_s32(arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulh_laneq_s32(arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhq_laneq_s32(arg_i32x4, arg_i32x4, 0);
+	vqdmulhq_laneq_s32(arg_i32x4, arg_i32x4, 3);
+	vqdmulhq_laneq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhq_laneq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, 0);
+	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, 3);
+	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulh_lane_s32(arg_i32x2, arg_i32x2, 0);
+	vqrdmulh_lane_s32(arg_i32x2, arg_i32x2, 1);
+	vqrdmulh_lane_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulh_lane_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhq_lane_s32(arg_i32x4, arg_i32x2, 0);
+	vqrdmulhq_lane_s32(arg_i32x4, arg_i32x2, 1);
+	vqrdmulhq_lane_s32(arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhq_lane_s32(arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, 0);
+	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, 1);
+	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulh_laneq_s32(arg_i32x2, arg_i32x4, 0);
+	vqrdmulh_laneq_s32(arg_i32x2, arg_i32x4, 3);
+	vqrdmulh_laneq_s32(arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulh_laneq_s32(arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhq_laneq_s32(arg_i32x4, arg_i32x4, 0);
+	vqrdmulhq_laneq_s32(arg_i32x4, arg_i32x4, 3);
+	vqrdmulhq_laneq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhq_laneq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, 0);
+	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, 3);
+	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c
new file mode 100644
index 000000000000..b5fa76b5be88
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c
@@ -0,0 +1,297 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_set_all_lanes_to_the_same_value_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vdup_lane_s8(arg_i8x8, 0);
+	vdup_lane_s8(arg_i8x8, 7);
+	vdup_lane_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_s8(arg_i8x8, 0);
+	vdupq_lane_s8(arg_i8x8, 7);
+	vdupq_lane_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_s8(arg_i8x16, 0);
+	vdup_laneq_s8(arg_i8x16, 15);
+	vdup_laneq_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_s8(arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_s8(arg_i8x16, 0);
+	vdupq_laneq_s8(arg_i8x16, 15);
+	vdupq_laneq_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_s8(arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vdup_lane_s16(arg_i16x4, 0);
+	vdup_lane_s16(arg_i16x4, 3);
+	vdup_lane_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_s16(arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_s16(arg_i16x4, 0);
+	vdupq_lane_s16(arg_i16x4, 3);
+	vdupq_lane_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_s16(arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_s16(arg_i16x8, 0);
+	vdup_laneq_s16(arg_i16x8, 7);
+	vdup_laneq_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_s16(arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_s16(arg_i16x8, 0);
+	vdupq_laneq_s16(arg_i16x8, 7);
+	vdupq_laneq_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_s16(arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_s32(int32x4_t arg_i32x4, int32x2_t arg_i32x2) {
+	vdup_lane_s32(arg_i32x2, 0);
+	vdup_lane_s32(arg_i32x2, 1);
+	vdup_lane_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_s32(arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_s32(arg_i32x2, 0);
+	vdupq_lane_s32(arg_i32x2, 1);
+	vdupq_lane_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_s32(arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_s32(arg_i32x4, 0);
+	vdup_laneq_s32(arg_i32x4, 3);
+	vdup_laneq_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_s32(arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_s32(arg_i32x4, 0);
+	vdupq_laneq_s32(arg_i32x4, 3);
+	vdupq_laneq_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_s32(arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_s64(int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vdup_lane_s64(arg_i64x1, 0);
+	vdup_lane_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_s64(arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_s64(arg_i64x1, 0);
+	vdupq_lane_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_s64(arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_s64(arg_i64x2, 0);
+	vdup_laneq_s64(arg_i64x2, 1);
+	vdup_laneq_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_s64(arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_s64(arg_i64x2, 0);
+	vdupq_laneq_s64(arg_i64x2, 1);
+	vdupq_laneq_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_s64(arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vdup_lane_u8(arg_u8x8, 0);
+	vdup_lane_u8(arg_u8x8, 7);
+	vdup_lane_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_u8(arg_u8x8, 0);
+	vdupq_lane_u8(arg_u8x8, 7);
+	vdupq_lane_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_u8(arg_u8x16, 0);
+	vdup_laneq_u8(arg_u8x16, 15);
+	vdup_laneq_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_u8(arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_u8(arg_u8x16, 0);
+	vdupq_laneq_u8(arg_u8x16, 15);
+	vdupq_laneq_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_u8(arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vdup_lane_u16(arg_u16x4, 0);
+	vdup_lane_u16(arg_u16x4, 3);
+	vdup_lane_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_u16(arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_u16(arg_u16x4, 0);
+	vdupq_lane_u16(arg_u16x4, 3);
+	vdupq_lane_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_u16(arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_u16(arg_u16x8, 0);
+	vdup_laneq_u16(arg_u16x8, 7);
+	vdup_laneq_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_u16(arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_u16(arg_u16x8, 0);
+	vdupq_laneq_u16(arg_u16x8, 7);
+	vdupq_laneq_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_u16(arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_u32(uint32x4_t arg_u32x4, uint32x2_t arg_u32x2) {
+	vdup_lane_u32(arg_u32x2, 0);
+	vdup_lane_u32(arg_u32x2, 1);
+	vdup_lane_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_u32(arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_u32(arg_u32x2, 0);
+	vdupq_lane_u32(arg_u32x2, 1);
+	vdupq_lane_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_u32(arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_u32(arg_u32x4, 0);
+	vdup_laneq_u32(arg_u32x4, 3);
+	vdup_laneq_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_u32(arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_u32(arg_u32x4, 0);
+	vdupq_laneq_u32(arg_u32x4, 3);
+	vdupq_laneq_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_u32(arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_u64(uint64x1_t arg_u64x1, uint64x2_t arg_u64x2) {
+	vdup_lane_u64(arg_u64x1, 0);
+	vdup_lane_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_u64(arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_u64(arg_u64x1, 0);
+	vdupq_lane_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_u64(arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_u64(arg_u64x2, 0);
+	vdup_laneq_u64(arg_u64x2, 1);
+	vdup_laneq_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_u64(arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_u64(arg_u64x2, 0);
+	vdupq_laneq_u64(arg_u64x2, 1);
+	vdupq_laneq_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_u64(arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_p64(poly64x1_t arg_p64x1, poly64x2_t arg_p64x2) {
+	vdup_lane_p64(arg_p64x1, 0);
+	vdup_lane_p64(arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_p64(arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_p64(arg_p64x1, 0);
+	vdupq_lane_p64(arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_p64(arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_p64(arg_p64x2, 0);
+	vdup_laneq_p64(arg_p64x2, 1);
+	vdup_laneq_p64(arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_p64(arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_p64(arg_p64x2, 0);
+	vdupq_laneq_p64(arg_p64x2, 1);
+	vdupq_laneq_p64(arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_p64(arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_f32(float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vdup_lane_f32(arg_f32x2, 0);
+	vdup_lane_f32(arg_f32x2, 1);
+	vdup_lane_f32(arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_f32(arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_f32(arg_f32x2, 0);
+	vdupq_lane_f32(arg_f32x2, 1);
+	vdupq_lane_f32(arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_f32(arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_f32(arg_f32x4, 0);
+	vdup_laneq_f32(arg_f32x4, 3);
+	vdup_laneq_f32(arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_f32(arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_f32(arg_f32x4, 0);
+	vdupq_laneq_f32(arg_f32x4, 3);
+	vdupq_laneq_f32(arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_f32(arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vdup_lane_p8(arg_p8x8, 0);
+	vdup_lane_p8(arg_p8x8, 7);
+	vdup_lane_p8(arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_p8(arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_p8(arg_p8x8, 0);
+	vdupq_lane_p8(arg_p8x8, 7);
+	vdupq_lane_p8(arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_p8(arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_p8(arg_p8x16, 0);
+	vdup_laneq_p8(arg_p8x16, 15);
+	vdup_laneq_p8(arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_p8(arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_p8(arg_p8x16, 0);
+	vdupq_laneq_p8(arg_p8x16, 15);
+	vdupq_laneq_p8(arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_p8(arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8) {
+	vdup_lane_p16(arg_p16x4, 0);
+	vdup_lane_p16(arg_p16x4, 3);
+	vdup_lane_p16(arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_p16(arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_p16(arg_p16x4, 0);
+	vdupq_lane_p16(arg_p16x4, 3);
+	vdupq_lane_p16(arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_p16(arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_p16(arg_p16x8, 0);
+	vdup_laneq_p16(arg_p16x8, 7);
+	vdup_laneq_p16(arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_p16(arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_p16(arg_p16x8, 0);
+	vdupq_laneq_p16(arg_p16x8, 7);
+	vdupq_laneq_p16(arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_p16(arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_f64(float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vdup_lane_f64(arg_f64x1, 0);
+	vdup_lane_f64(arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_f64(arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_f64(arg_f64x1, 0);
+	vdupq_lane_f64(arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_f64(arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_f64(arg_f64x2, 0);
+	vdup_laneq_f64(arg_f64x2, 1);
+	vdup_laneq_f64(arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_f64(arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_f64(arg_f64x2, 0);
+	vdupq_laneq_f64(arg_f64x2, 1);
+	vdupq_laneq_f64(arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_f64(arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/set-vector-lane.c clang/test/Sema/aarch64-neon-immediate-ranges/set-vector-lane.c
new file mode 100644
index 000000000000..3ab077ed5628
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/set-vector-lane.c
@@ -0,0 +1,162 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vsetq_lane_u8, vsetq_lane_u16, vsetq_lane_u32, vsetq_lane_u64 are
+// tesed under clang/test/Sema/arm-mve-immediates.c
+
+void test_set_vector_lane_u8(uint8x16_t arg_u8x16, uint8_t arg_u8, uint8x8_t arg_u8x8) {
+	vset_lane_u8(arg_u8, arg_u8x8, 0);
+	vset_lane_u8(arg_u8, arg_u8x8, 7);
+	vset_lane_u8(arg_u8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_u8(arg_u8, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_set_vector_lane_u16(uint16x4_t arg_u16x4, uint16_t arg_u16, uint16x8_t arg_u16x8) {
+	vset_lane_u16(arg_u16, arg_u16x4, 0);
+	vset_lane_u16(arg_u16, arg_u16x4, 3);
+	vset_lane_u16(arg_u16, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_u16(arg_u16, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_set_vector_lane_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4, uint32_t arg_u32) {
+	vset_lane_u32(arg_u32, arg_u32x2, 0);
+	vset_lane_u32(arg_u32, arg_u32x2, 1);
+	vset_lane_u32(arg_u32, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_u32(arg_u32, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_set_vector_lane_u64(uint64x2_t arg_u64x2, uint64x1_t arg_u64x1, uint64_t arg_u64) {
+	vset_lane_u64(arg_u64, arg_u64x1, 0);
+	vset_lane_u64(arg_u64, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_u64(arg_u64, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_set_vector_lane_p64(poly64_t arg_p64, poly64x1_t arg_p64x1, poly64x2_t arg_p64x2) {
+	vset_lane_p64(arg_p64, arg_p64x1, 0);
+	vset_lane_p64(arg_p64, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_p64(arg_p64, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_p64(arg_p64, arg_p64x2, 0);
+	vsetq_lane_p64(arg_p64, arg_p64x2, 1);
+	vsetq_lane_p64(arg_p64, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_p64(arg_p64, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_s8(int8x16_t arg_i8x16, int8x8_t arg_i8x8, int8_t arg_i8) {
+	vset_lane_s8(arg_i8, arg_i8x8, 0);
+	vset_lane_s8(arg_i8, arg_i8x8, 7);
+	vset_lane_s8(arg_i8, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_s8(arg_i8, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_s8(arg_i8, arg_i8x16, 0);
+	vsetq_lane_s8(arg_i8, arg_i8x16, 15);
+	vsetq_lane_s8(arg_i8, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_s8(arg_i8, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_s16(int16x4_t arg_i16x4, int16_t arg_i16, int16x8_t arg_i16x8) {
+	vset_lane_s16(arg_i16, arg_i16x4, 0);
+	vset_lane_s16(arg_i16, arg_i16x4, 3);
+	vset_lane_s16(arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_s16(arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_s16(arg_i16, arg_i16x8, 0);
+	vsetq_lane_s16(arg_i16, arg_i16x8, 7);
+	vsetq_lane_s16(arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_s16(arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_s32(int32_t arg_i32, int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vset_lane_s32(arg_i32, arg_i32x2, 0);
+	vset_lane_s32(arg_i32, arg_i32x2, 1);
+	vset_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_s32(arg_i32, arg_i32x4, 0);
+	vsetq_lane_s32(arg_i32, arg_i32x4, 3);
+	vsetq_lane_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_s64(int64_t arg_i64, int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vset_lane_s64(arg_i64, arg_i64x1, 0);
+	vset_lane_s64(arg_i64, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_s64(arg_i64, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_s64(arg_i64, arg_i64x2, 0);
+	vsetq_lane_s64(arg_i64, arg_i64x2, 1);
+	vsetq_lane_s64(arg_i64, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_s64(arg_i64, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_p8(poly8_t arg_p8, poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vset_lane_p8(arg_p8, arg_p8x8, 0);
+	vset_lane_p8(arg_p8, arg_p8x8, 7);
+	vset_lane_p8(arg_p8, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_p8(arg_p8, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_p8(arg_p8, arg_p8x16, 0);
+	vsetq_lane_p8(arg_p8, arg_p8x16, 15);
+	vsetq_lane_p8(arg_p8, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_p8(arg_p8, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_p16(poly16x4_t arg_p16x4, poly16_t arg_p16, poly16x8_t arg_p16x8) {
+	vset_lane_p16(arg_p16, arg_p16x4, 0);
+	vset_lane_p16(arg_p16, arg_p16x4, 3);
+	vset_lane_p16(arg_p16, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_p16(arg_p16, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_p16(arg_p16, arg_p16x8, 0);
+	vsetq_lane_p16(arg_p16, arg_p16x8, 7);
+	vsetq_lane_p16(arg_p16, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_p16(arg_p16, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_f16(float16x8_t arg_f16x8, float16x4_t arg_f16x4, float16_t arg_f16) {
+	vset_lane_f16(arg_f16, arg_f16x4, 0);
+	vset_lane_f16(arg_f16, arg_f16x4, 3);
+	vset_lane_f16(arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_f16(arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_f16(arg_f16, arg_f16x8, 0);
+	vsetq_lane_f16(arg_f16, arg_f16x8, 7);
+	vsetq_lane_f16(arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_f16(arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_f32(float32x2_t arg_f32x2, float32x4_t arg_f32x4, float32_t arg_f32) {
+	vset_lane_f32(arg_f32, arg_f32x2, 0);
+	vset_lane_f32(arg_f32, arg_f32x2, 1);
+	vset_lane_f32(arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_f32(arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_f32(arg_f32, arg_f32x4, 0);
+	vsetq_lane_f32(arg_f32, arg_f32x4, 3);
+	vsetq_lane_f32(arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_f32(arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_f64(float64x1_t arg_f64x1, float64x2_t arg_f64x2, float64_t arg_f64) {
+	vset_lane_f64(arg_f64, arg_f64x1, 0);
+	vset_lane_f64(arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_f64(arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_f64(arg_f64, arg_f64x2, 0);
+	vsetq_lane_f64(arg_f64, arg_f64x2, 1);
+	vsetq_lane_f64(arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_f64(arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c
new file mode 100644
index 000000000000..2439fb79737e
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -target-feature +v8.1a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+void test_saturating_multiply_accumulate_by_element_s16(int16x8_t arg_i16x8, int16_t arg_i16, int16x4_t arg_i16x4) {
+	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
+	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
+	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 0);
+	vqrdmlahq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 3);
+	vqrdmlahq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlah_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 0);
+	vqrdmlah_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 7);
+	vqrdmlah_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlah_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 0);
+	vqrdmlahq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 7);
+	vqrdmlahq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlsh_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
+	vqrdmlsh_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
+	vqrdmlsh_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlsh_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 0);
+	vqrdmlshq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 3);
+	vqrdmlshq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlsh_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 0);
+	vqrdmlsh_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 7);
+	vqrdmlsh_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlsh_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 0);
+	vqrdmlshq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 7);
+	vqrdmlshq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahh_lane_s16(arg_i16, arg_i16, arg_i16x4, 0);
+	vqrdmlahh_lane_s16(arg_i16, arg_i16, arg_i16x4, 3);
+	vqrdmlahh_lane_s16(arg_i16, arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahh_lane_s16(arg_i16, arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 0);
+	vqrdmlahh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 7);
+	vqrdmlahh_laneq_s16(arg_i16, arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshh_lane_s16(arg_i16, arg_i16, arg_i16x4, 0);
+	vqrdmlshh_lane_s16(arg_i16, arg_i16, arg_i16x4, 3);
+	vqrdmlshh_lane_s16(arg_i16, arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshh_lane_s16(arg_i16, arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 0);
+	vqrdmlshh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 7);
+	vqrdmlshh_laneq_s16(arg_i16, arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_saturating_multiply_accumulate_by_element_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4, int32_t arg_i32) {
+	vqrdmlah_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 0);
+	vqrdmlah_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 1);
+	vqrdmlah_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlah_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 0);
+	vqrdmlahq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 1);
+	vqrdmlahq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlah_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 0);
+	vqrdmlah_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 3);
+	vqrdmlah_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlah_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 0);
+	vqrdmlahq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 3);
+	vqrdmlahq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlsh_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 0);
+	vqrdmlsh_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 1);
+	vqrdmlsh_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlsh_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 0);
+	vqrdmlshq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 1);
+	vqrdmlshq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlsh_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 0);
+	vqrdmlsh_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 3);
+	vqrdmlsh_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlsh_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 0);
+	vqrdmlshq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 3);
+	vqrdmlshq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, 0);
+	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, 1);
+	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 0);
+	vqrdmlahs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 3);
+	vqrdmlahs_laneq_s32(arg_i32, arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshs_lane_s32(arg_i32, arg_i32, arg_i32x2, 0);
+	vqrdmlshs_lane_s32(arg_i32, arg_i32, arg_i32x2, 1);
+	vqrdmlshs_lane_s32(arg_i32, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshs_lane_s32(arg_i32, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 0);
+	vqrdmlshs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 3);
+	vqrdmlshs_laneq_s32(arg_i32, arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/vcmla.c clang/test/Sema/aarch64-neon-immediate-ranges/vcmla.c
new file mode 100644
index 000000000000..21c24975b38b
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/vcmla.c
@@ -0,0 +1,203 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.3a -ffreestanding -fsyntax-only -verify %s
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+#include <arm_fp16.h>
+
+void test_vcmla_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+  vcmla_lane_f16(a, b, c, 0);
+  vcmla_lane_f16(a, b, c, 1);
+
+  vcmla_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+  vcmla_laneq_f16(a, b, c, 0);
+  vcmla_laneq_f16(a, b, c, 1);
+  vcmla_laneq_f16(a, b, c, 3);
+
+  vcmla_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_laneq_f16(a, b, c, 4);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c){
+  vcmlaq_lane_f16(a, b, c, 0);
+  vcmlaq_lane_f16(a, b, c, 1);
+
+  vcmlaq_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_lane_f16(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+  vcmlaq_laneq_f16(a, b, c, 0);
+  vcmlaq_laneq_f16(a, b, c, 1);
+  vcmlaq_laneq_f16(a, b, c, 3);
+
+  vcmlaq_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_laneq_f16(a, b, c, 4);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+  vcmla_lane_f32(a, b, c, 0);
+
+  vcmla_lane_f32(a, b, c, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_lane_f32(a, b, c, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_lane_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+  vcmla_laneq_f32(a, b, c, 0);
+
+  vcmla_laneq_f32(a, b, c, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_laneq_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+  vcmlaq_laneq_f32(a, b, c, 0);
+  vcmlaq_laneq_f32(a, b, c, 1);
+
+  vcmlaq_laneq_f32(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_laneq_f32(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+  vcmla_rot90_lane_f16(a, b, c, 0);
+  vcmla_rot90_lane_f16(a, b, c, 1);
+
+  vcmla_rot90_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+  vcmla_rot90_laneq_f16(a, b, c, 0);
+  vcmla_rot90_laneq_f16(a, b, c, 3);
+
+  vcmla_rot90_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot90_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+  vcmlaq_rot90_laneq_f16(a, b, c, 0);
+  vcmlaq_rot90_laneq_f16(a, b, c, 3);
+
+  vcmlaq_rot90_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot90_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+  vcmla_rot180_lane_f16(a, b, c, 0);
+  vcmla_rot180_lane_f16(a, b, c, 1);
+
+  vcmla_rot180_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+  vcmla_rot180_laneq_f16(a, b, c, 0);
+  vcmla_rot180_laneq_f16(a, b, c, 3);
+
+  vcmla_rot180_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot180_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+  vcmlaq_rot180_laneq_f16(a, b, c, 0);
+  vcmlaq_rot180_laneq_f16(a, b, c, 3);
+
+  vcmlaq_rot180_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot180_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+  vcmla_rot270_lane_f16(a, b, c, 0);
+  vcmla_rot270_lane_f16(a, b, c, 1);
+
+  vcmla_rot270_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+  vcmla_rot270_laneq_f16(a, b, c, 0);
+  vcmla_rot270_laneq_f16(a, b, c, 3);
+
+  vcmla_rot270_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot270_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+  vcmlaq_rot270_laneq_f16(a, b, c, 0);
+  vcmlaq_rot270_laneq_f16(a, b, c, 3);
+
+  vcmlaq_rot270_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot270_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+  vcmla_rot90_lane_f32(a, b, c, 0);
+
+  vcmla_rot90_lane_f32(a, b, c, 1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_lane_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+  vcmla_rot90_laneq_f32(a, b, c, 0);
+  vcmla_rot90_laneq_f32(a, b, c, 1);
+
+  vcmla_rot90_laneq_f32(a, b, c, 2);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_laneq_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot90_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+  vcmlaq_rot90_laneq_f32(a, b, c, 0);
+  vcmlaq_rot90_laneq_f32(a, b, c, 1);
+
+  vcmlaq_rot90_laneq_f32(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot90_laneq_f32(a, b, c, -1);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+  vcmla_rot180_lane_f32(a, b, c, 0);
+
+  vcmla_rot180_lane_f32(a, b, c, 1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_lane_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+  vcmla_rot180_laneq_f32(a, b, c, 0);
+  vcmla_rot180_laneq_f32(a, b, c, 1);
+
+  vcmla_rot180_laneq_f32(a, b, c, 2);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_laneq_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot180_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+  vcmlaq_rot90_laneq_f32(a, b, c, 0);
+  vcmlaq_rot90_laneq_f32(a, b, c, 1);
+
+  vcmlaq_rot90_laneq_f32(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot90_laneq_f32(a, b, c, -1);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+  vcmla_rot270_lane_f32(a, b, c, 0);
+
+  vcmla_rot270_lane_f32(a, b, c, 1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_lane_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+  vcmla_rot270_laneq_f32(a, b, c, 0);
+  vcmla_rot270_laneq_f32(a, b, c, 1);
+
+  vcmla_rot270_laneq_f32(a, b, c, 2);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_laneq_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot270_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+  vcmlaq_rot270_laneq_f32(a, b, c, 0);
+  vcmlaq_rot270_laneq_f32(a, b, c, 1);
+
+  vcmlaq_rot270_laneq_f32(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot270_laneq_f32(a, b, c, -1);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
\ No newline at end of file
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c
new file mode 100644
index 000000000000..3259d47e1b62
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c
@@ -0,0 +1,692 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_vector_load_s8(int8x8x2_t arg_i8x8x2, int8x8x3_t arg_i8x8x3, int8x16x2_t arg_i8x16x2,
+						 int8x16x3_t arg_i8x16x3, int8x8_t arg_i8x8, int8x16x4_t arg_i8x16x4,
+						 int8x16_t arg_i8x16, int8x8x4_t arg_i8x8x4, int8_t* arg_i8_ptr) {
+	vld1_lane_s8(arg_i8_ptr, arg_i8x8, 0);
+	vld1_lane_s8(arg_i8_ptr, arg_i8x8, 7);
+	vld1_lane_s8(arg_i8_ptr, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_s8(arg_i8_ptr, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_s8(arg_i8_ptr, arg_i8x16, 0);
+	vld1q_lane_s8(arg_i8_ptr, arg_i8x16, 15);
+	vld1q_lane_s8(arg_i8_ptr, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_s8(arg_i8_ptr, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_s8(arg_i8_ptr, arg_i8x8x2, 0);
+	vld2_lane_s8(arg_i8_ptr, arg_i8x8x2, 7);
+	vld2_lane_s8(arg_i8_ptr, arg_i8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_s8(arg_i8_ptr, arg_i8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_s8(arg_i8_ptr, arg_i8x16x2, 0);
+	vld2q_lane_s8(arg_i8_ptr, arg_i8x16x2, 15);
+	vld2q_lane_s8(arg_i8_ptr, arg_i8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_s8(arg_i8_ptr, arg_i8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_s8(arg_i8_ptr, arg_i8x8x3, 0);
+	vld3_lane_s8(arg_i8_ptr, arg_i8x8x3, 7);
+	vld3_lane_s8(arg_i8_ptr, arg_i8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_s8(arg_i8_ptr, arg_i8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_s8(arg_i8_ptr, arg_i8x16x3, 0);
+	vld3q_lane_s8(arg_i8_ptr, arg_i8x16x3, 15);
+	vld3q_lane_s8(arg_i8_ptr, arg_i8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_s8(arg_i8_ptr, arg_i8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_s8(arg_i8_ptr, arg_i8x8x4, 0);
+	vld4_lane_s8(arg_i8_ptr, arg_i8x8x4, 7);
+	vld4_lane_s8(arg_i8_ptr, arg_i8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_s8(arg_i8_ptr, arg_i8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_s8(arg_i8_ptr, arg_i8x16x4, 0);
+	vld4q_lane_s8(arg_i8_ptr, arg_i8x16x4, 15);
+	vld4q_lane_s8(arg_i8_ptr, arg_i8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_s8(arg_i8_ptr, arg_i8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_s16(int16x8x2_t arg_i16x8x2, int16x8x3_t arg_i16x8x3, int16x8x4_t arg_i16x8x4,
+						  int16_t* arg_i16_ptr, int16x4x2_t arg_i16x4x2, int16x4x3_t arg_i16x4x3,
+						  int16x8_t arg_i16x8, int16x4x4_t arg_i16x4x4, int16x4_t arg_i16x4) {
+	vld1_lane_s16(arg_i16_ptr, arg_i16x4, 0);
+	vld1_lane_s16(arg_i16_ptr, arg_i16x4, 3);
+	vld1_lane_s16(arg_i16_ptr, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_s16(arg_i16_ptr, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_s16(arg_i16_ptr, arg_i16x8, 0);
+	vld1q_lane_s16(arg_i16_ptr, arg_i16x8, 7);
+	vld1q_lane_s16(arg_i16_ptr, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_s16(arg_i16_ptr, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_s16(arg_i16_ptr, arg_i16x4x2, 0);
+	vld2_lane_s16(arg_i16_ptr, arg_i16x4x2, 3);
+	vld2_lane_s16(arg_i16_ptr, arg_i16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_s16(arg_i16_ptr, arg_i16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_s16(arg_i16_ptr, arg_i16x8x2, 0);
+	vld2q_lane_s16(arg_i16_ptr, arg_i16x8x2, 7);
+	vld2q_lane_s16(arg_i16_ptr, arg_i16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_s16(arg_i16_ptr, arg_i16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_s16(arg_i16_ptr, arg_i16x4x3, 0);
+	vld3_lane_s16(arg_i16_ptr, arg_i16x4x3, 3);
+	vld3_lane_s16(arg_i16_ptr, arg_i16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_s16(arg_i16_ptr, arg_i16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_s16(arg_i16_ptr, arg_i16x8x3, 0);
+	vld3q_lane_s16(arg_i16_ptr, arg_i16x8x3, 7);
+	vld3q_lane_s16(arg_i16_ptr, arg_i16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_s16(arg_i16_ptr, arg_i16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_s16(arg_i16_ptr, arg_i16x4x4, 0);
+	vld4_lane_s16(arg_i16_ptr, arg_i16x4x4, 3);
+	vld4_lane_s16(arg_i16_ptr, arg_i16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_s16(arg_i16_ptr, arg_i16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_s16(arg_i16_ptr, arg_i16x8x4, 0);
+	vld4q_lane_s16(arg_i16_ptr, arg_i16x8x4, 7);
+	vld4q_lane_s16(arg_i16_ptr, arg_i16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_s16(arg_i16_ptr, arg_i16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_s32(int32x2x4_t arg_i32x2x4, int32x4_t arg_i32x4, int32x2_t arg_i32x2,
+						  int32x4x2_t arg_i32x4x2, int32x4x4_t arg_i32x4x4, int32_t* arg_i32_ptr,
+						  int32x2x3_t arg_i32x2x3, int32x4x3_t arg_i32x4x3, int32x2x2_t arg_i32x2x2) {
+	vld1_lane_s32(arg_i32_ptr, arg_i32x2, 0);
+	vld1_lane_s32(arg_i32_ptr, arg_i32x2, 1);
+	vld1_lane_s32(arg_i32_ptr, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_s32(arg_i32_ptr, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_s32(arg_i32_ptr, arg_i32x4, 0);
+	vld1q_lane_s32(arg_i32_ptr, arg_i32x4, 3);
+	vld1q_lane_s32(arg_i32_ptr, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_s32(arg_i32_ptr, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_s32(arg_i32_ptr, arg_i32x2x2, 0);
+	vld2_lane_s32(arg_i32_ptr, arg_i32x2x2, 1);
+	vld2_lane_s32(arg_i32_ptr, arg_i32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_s32(arg_i32_ptr, arg_i32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_s32(arg_i32_ptr, arg_i32x4x2, 0);
+	vld2q_lane_s32(arg_i32_ptr, arg_i32x4x2, 3);
+	vld2q_lane_s32(arg_i32_ptr, arg_i32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_s32(arg_i32_ptr, arg_i32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_s32(arg_i32_ptr, arg_i32x2x3, 0);
+	vld3_lane_s32(arg_i32_ptr, arg_i32x2x3, 1);
+	vld3_lane_s32(arg_i32_ptr, arg_i32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_s32(arg_i32_ptr, arg_i32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_s32(arg_i32_ptr, arg_i32x4x3, 0);
+	vld3q_lane_s32(arg_i32_ptr, arg_i32x4x3, 3);
+	vld3q_lane_s32(arg_i32_ptr, arg_i32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_s32(arg_i32_ptr, arg_i32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_s32(arg_i32_ptr, arg_i32x2x4, 0);
+	vld4_lane_s32(arg_i32_ptr, arg_i32x2x4, 1);
+	vld4_lane_s32(arg_i32_ptr, arg_i32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_s32(arg_i32_ptr, arg_i32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_s32(arg_i32_ptr, arg_i32x4x4, 0);
+	vld4q_lane_s32(arg_i32_ptr, arg_i32x4x4, 3);
+	vld4q_lane_s32(arg_i32_ptr, arg_i32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_s32(arg_i32_ptr, arg_i32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_s64(int64x1x4_t arg_i64x1x4, int64x1_t arg_i64x1, int64x2x2_t arg_i64x2x2,
+						  int64x2x4_t arg_i64x2x4, int64x1x3_t arg_i64x1x3, int64x1x2_t arg_i64x1x2,
+						  int64x2_t arg_i64x2, int64x2x3_t arg_i64x2x3, int64_t* arg_i64_ptr) {
+	vld1_lane_s64(arg_i64_ptr, arg_i64x1, 0);
+	vld1_lane_s64(arg_i64_ptr, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_s64(arg_i64_ptr, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_s64(arg_i64_ptr, arg_i64x2, 0);
+	vld1q_lane_s64(arg_i64_ptr, arg_i64x2, 1);
+	vld1q_lane_s64(arg_i64_ptr, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_s64(arg_i64_ptr, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1_lane_s64(arg_i64_ptr, arg_i64x1, 0);
+	vldap1_lane_s64(arg_i64_ptr, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1_lane_s64(arg_i64_ptr, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1q_lane_s64(arg_i64_ptr, arg_i64x2, 0);
+	vldap1q_lane_s64(arg_i64_ptr, arg_i64x2, 1);
+	vldap1q_lane_s64(arg_i64_ptr, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1q_lane_s64(arg_i64_ptr, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1_lane_s64(arg_i64_ptr, arg_i64x1, 0);
+	vstl1_lane_s64(arg_i64_ptr, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1_lane_s64(arg_i64_ptr, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, 0);
+	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, 1);
+	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_s64(arg_i64_ptr, arg_i64x1x2, 0);
+	vld2_lane_s64(arg_i64_ptr, arg_i64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_s64(arg_i64_ptr, arg_i64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_s64(arg_i64_ptr, arg_i64x2x2, 0);
+	vld2q_lane_s64(arg_i64_ptr, arg_i64x2x2, 1);
+	vld2q_lane_s64(arg_i64_ptr, arg_i64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_s64(arg_i64_ptr, arg_i64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_s64(arg_i64_ptr, arg_i64x1x3, 0);
+	vld3_lane_s64(arg_i64_ptr, arg_i64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_s64(arg_i64_ptr, arg_i64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_s64(arg_i64_ptr, arg_i64x2x3, 0);
+	vld3q_lane_s64(arg_i64_ptr, arg_i64x2x3, 1);
+	vld3q_lane_s64(arg_i64_ptr, arg_i64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_s64(arg_i64_ptr, arg_i64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_s64(arg_i64_ptr, arg_i64x1x4, 0);
+	vld4_lane_s64(arg_i64_ptr, arg_i64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_s64(arg_i64_ptr, arg_i64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_s64(arg_i64_ptr, arg_i64x2x4, 0);
+	vld4q_lane_s64(arg_i64_ptr, arg_i64x2x4, 1);
+	vld4q_lane_s64(arg_i64_ptr, arg_i64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_s64(arg_i64_ptr, arg_i64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_u8(uint8x8x2_t arg_u8x8x2, uint8x16x2_t arg_u8x16x2, uint8x8x4_t arg_u8x8x4,
+						uint8x8_t arg_u8x8, uint8x8x3_t arg_u8x8x3, uint8x16_t arg_u8x16,
+						uint8x16x4_t arg_u8x16x4, uint8_t *arg_u8_ptr, uint8x16x3_t arg_u8x16x3) {
+	vld1_lane_u8(arg_u8_ptr, arg_u8x8, 0);
+	vld1_lane_u8(arg_u8_ptr, arg_u8x8, 7);
+	vld1_lane_u8(arg_u8_ptr, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_u8(arg_u8_ptr, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_u8(arg_u8_ptr, arg_u8x16, 0);
+	vld1q_lane_u8(arg_u8_ptr, arg_u8x16, 15);
+	vld1q_lane_u8(arg_u8_ptr, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_u8(arg_u8_ptr, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_u8(arg_u8_ptr, arg_u8x8x2, 0);
+	vld2_lane_u8(arg_u8_ptr, arg_u8x8x2, 7);
+	vld2_lane_u8(arg_u8_ptr, arg_u8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_u8(arg_u8_ptr, arg_u8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 0);
+	vld2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 15);
+	vld2q_lane_u8(arg_u8_ptr, arg_u8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_u8(arg_u8_ptr, arg_u8x8x3, 0);
+	vld3_lane_u8(arg_u8_ptr, arg_u8x8x3, 7);
+	vld3_lane_u8(arg_u8_ptr, arg_u8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_u8(arg_u8_ptr, arg_u8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 0);
+	vld3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 15);
+	vld3q_lane_u8(arg_u8_ptr, arg_u8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_u8(arg_u8_ptr, arg_u8x8x4, 0);
+	vld4_lane_u8(arg_u8_ptr, arg_u8x8x4, 7);
+	vld4_lane_u8(arg_u8_ptr, arg_u8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_u8(arg_u8_ptr, arg_u8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 0);
+	vld4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 15);
+	vld4q_lane_u8(arg_u8_ptr, arg_u8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_u16(uint16x8x2_t arg_u16x8x2, uint16x8x4_t arg_u16x8x4, uint16x4x4_t arg_u16x4x4,
+						  uint16x4x2_t arg_u16x4x2, uint16x8_t arg_u16x8, uint16_t *arg_u16_ptr,
+						  uint16x8x3_t arg_u16x8x3, uint16x4_t arg_u16x4, uint16x4x3_t arg_u16x4x3) {
+	vld1_lane_u16(arg_u16_ptr, arg_u16x4, 0);
+	vld1_lane_u16(arg_u16_ptr, arg_u16x4, 3);
+	vld1_lane_u16(arg_u16_ptr, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_u16(arg_u16_ptr, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_u16(arg_u16_ptr, arg_u16x8, 0);
+	vld1q_lane_u16(arg_u16_ptr, arg_u16x8, 7);
+	vld1q_lane_u16(arg_u16_ptr, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_u16(arg_u16_ptr, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_u16(arg_u16_ptr, arg_u16x4x2, 0);
+	vld2_lane_u16(arg_u16_ptr, arg_u16x4x2, 3);
+	vld2_lane_u16(arg_u16_ptr, arg_u16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_u16(arg_u16_ptr, arg_u16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 0);
+	vld2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 7);
+	vld2q_lane_u16(arg_u16_ptr, arg_u16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_u16(arg_u16_ptr, arg_u16x4x3, 0);
+	vld3_lane_u16(arg_u16_ptr, arg_u16x4x3, 3);
+	vld3_lane_u16(arg_u16_ptr, arg_u16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_u16(arg_u16_ptr, arg_u16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 0);
+	vld3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 7);
+	vld3q_lane_u16(arg_u16_ptr, arg_u16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_u16(arg_u16_ptr, arg_u16x4x4, 0);
+	vld4_lane_u16(arg_u16_ptr, arg_u16x4x4, 3);
+	vld4_lane_u16(arg_u16_ptr, arg_u16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_u16(arg_u16_ptr, arg_u16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 0);
+	vld4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 7);
+	vld4q_lane_u16(arg_u16_ptr, arg_u16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_u32(uint32x2x3_t arg_u32x2x3, uint32x2_t arg_u32x2, uint32x2x4_t arg_u32x2x4,
+						  uint32x4_t arg_u32x4, uint32x4x2_t arg_u32x4x2, uint32x2x2_t arg_u32x2x2,
+						  void *arg_u32_ptr, uint32x4x4_t arg_u32x4x4, uint32x4x3_t arg_u32x4x3) {
+	vld1_lane_u32(arg_u32_ptr, arg_u32x2, 0);
+	vld1_lane_u32(arg_u32_ptr, arg_u32x2, 1);
+	vld1_lane_u32(arg_u32_ptr, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_u32(arg_u32_ptr, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_u32(arg_u32_ptr, arg_u32x4, 0);
+	vld1q_lane_u32(arg_u32_ptr, arg_u32x4, 3);
+	vld1q_lane_u32(arg_u32_ptr, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_u32(arg_u32_ptr, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_u32(arg_u32_ptr, arg_u32x2x2, 0);
+	vld2_lane_u32(arg_u32_ptr, arg_u32x2x2, 1);
+	vld2_lane_u32(arg_u32_ptr, arg_u32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_u32(arg_u32_ptr, arg_u32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 0);
+	vld2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 3);
+	vld2q_lane_u32(arg_u32_ptr, arg_u32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_u32(arg_u32_ptr, arg_u32x2x3, 0);
+	vld3_lane_u32(arg_u32_ptr, arg_u32x2x3, 1);
+	vld3_lane_u32(arg_u32_ptr, arg_u32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_u32(arg_u32_ptr, arg_u32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 0);
+	vld3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 3);
+	vld3q_lane_u32(arg_u32_ptr, arg_u32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_u32(arg_u32_ptr, arg_u32x2x4, 0);
+	vld4_lane_u32(arg_u32_ptr, arg_u32x2x4, 1);
+	vld4_lane_u32(arg_u32_ptr, arg_u32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_u32(arg_u32_ptr, arg_u32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 0);
+	vld4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 3);
+	vld4q_lane_u32(arg_u32_ptr, arg_u32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_u64(uint64x2x2_t arg_u64x2x2, uint64x1x2_t arg_u64x1x2, uint64x2x3_t arg_u64x2x3,
+						  uint64x1_t arg_u64x1, uint64x1x4_t arg_u64x1x4, uint64x1x3_t arg_u64x1x3,
+						  uint64_t *arg_u64_ptr, uint64x2_t arg_u64x2, uint64x2x4_t arg_u64x2x4) {
+	vld1_lane_u64(arg_u64_ptr, arg_u64x1, 0);
+	vld1_lane_u64(arg_u64_ptr, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_u64(arg_u64_ptr, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_u64(arg_u64_ptr, arg_u64x2, 0);
+	vld1q_lane_u64(arg_u64_ptr, arg_u64x2, 1);
+	vld1q_lane_u64(arg_u64_ptr, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_u64(arg_u64_ptr, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1_lane_u64(arg_u64_ptr, arg_u64x1, 0);
+	vldap1_lane_u64(arg_u64_ptr, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1_lane_u64(arg_u64_ptr, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1q_lane_u64(arg_u64_ptr, arg_u64x2, 0);
+	vldap1q_lane_u64(arg_u64_ptr, arg_u64x2, 1);
+	vldap1q_lane_u64(arg_u64_ptr, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1q_lane_u64(arg_u64_ptr, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1_lane_u64(arg_u64_ptr, arg_u64x1, 0);
+	vstl1_lane_u64(arg_u64_ptr, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1_lane_u64(arg_u64_ptr, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1q_lane_u64(arg_u64_ptr, arg_u64x2, 0);
+	vstl1q_lane_u64(arg_u64_ptr, arg_u64x2, 1);
+	vstl1q_lane_u64(arg_u64_ptr, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1q_lane_u64(arg_u64_ptr, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_u64(arg_u64_ptr, arg_u64x1x2, 0);
+	vld2_lane_u64(arg_u64_ptr, arg_u64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_u64(arg_u64_ptr, arg_u64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 0);
+	vld2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 1);
+	vld2q_lane_u64(arg_u64_ptr, arg_u64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_u64(arg_u64_ptr, arg_u64x1x3, 0);
+	vld3_lane_u64(arg_u64_ptr, arg_u64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_u64(arg_u64_ptr, arg_u64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 0);
+	vld3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 1);
+	vld3q_lane_u64(arg_u64_ptr, arg_u64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_u64(arg_u64_ptr, arg_u64x1x4, 0);
+	vld4_lane_u64(arg_u64_ptr, arg_u64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_u64(arg_u64_ptr, arg_u64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 0);
+	vld4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 1);
+	vld4q_lane_u64(arg_u64_ptr, arg_u64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_p64(poly64_t *arg_p64_ptr, poly64x2x2_t arg_p64x2x2, poly64x1x2_t arg_p64x1x2,
+						  poly64x2x4_t arg_p64x2x4, poly64x1x3_t arg_p64x1x3, poly64x2x3_t arg_p64x2x3,
+						  poly64x1_t arg_p64x1, poly64x2_t arg_p64x2, poly64x1x4_t arg_p64x1x4) {
+	vld1_lane_p64(arg_p64_ptr, arg_p64x1, 0);
+	vld1_lane_p64(arg_p64_ptr, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_p64(arg_p64_ptr, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_p64(arg_p64_ptr, arg_p64x2, 0);
+	vld1q_lane_p64(arg_p64_ptr, arg_p64x2, 1);
+	vld1q_lane_p64(arg_p64_ptr, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_p64(arg_p64_ptr, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1_lane_p64(arg_p64_ptr, arg_p64x1, 0);
+	vldap1_lane_p64(arg_p64_ptr, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1_lane_p64(arg_p64_ptr, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1q_lane_p64(arg_p64_ptr, arg_p64x2, 0);
+	vldap1q_lane_p64(arg_p64_ptr, arg_p64x2, 1);
+	vldap1q_lane_p64(arg_p64_ptr, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1q_lane_p64(arg_p64_ptr, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1_lane_p64(arg_p64_ptr, arg_p64x1, 0);
+	vstl1_lane_p64(arg_p64_ptr, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1_lane_p64(arg_p64_ptr, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1q_lane_p64(arg_p64_ptr, arg_p64x2, 0);
+	vstl1q_lane_p64(arg_p64_ptr, arg_p64x2, 1);
+	vstl1q_lane_p64(arg_p64_ptr, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1q_lane_p64(arg_p64_ptr, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_p64(arg_p64_ptr, arg_p64x1x2, 0);
+	vld2_lane_p64(arg_p64_ptr, arg_p64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_p64(arg_p64_ptr, arg_p64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 0);
+	vld2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 1);
+	vld2q_lane_p64(arg_p64_ptr, arg_p64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_p64(arg_p64_ptr, arg_p64x1x3, 0);
+	vld3_lane_p64(arg_p64_ptr, arg_p64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_p64(arg_p64_ptr, arg_p64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 0);
+	vld3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 1);
+	vld3q_lane_p64(arg_p64_ptr, arg_p64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_p64(arg_p64_ptr, arg_p64x1x4, 0);
+	vld4_lane_p64(arg_p64_ptr, arg_p64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_p64(arg_p64_ptr, arg_p64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 0);
+	vld4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 1);
+	vld4q_lane_p64(arg_p64_ptr, arg_p64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_f16(float16_t *arg_f16_ptr, float16x8_t arg_f16x8, float16x8x2_t arg_f16x8x2,
+						  float16x8x3_t arg_f16x8x3, float16x4x4_t arg_f16x4x4, float16x8x4_t arg_f16x8x4,
+						  float16x4x2_t arg_f16x4x2, float16x4_t arg_f16x4, float16x4x3_t arg_f16x4x3) {
+	vld1_lane_f16(arg_f16_ptr, arg_f16x4, 0);
+	vld1_lane_f16(arg_f16_ptr, arg_f16x4, 3);
+	vld1_lane_f16(arg_f16_ptr, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_f16(arg_f16_ptr, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_f16(arg_f16_ptr, arg_f16x8, 0);
+	vld1q_lane_f16(arg_f16_ptr, arg_f16x8, 7);
+	vld1q_lane_f16(arg_f16_ptr, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_f16(arg_f16_ptr, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_f16(arg_f16_ptr, arg_f16x4x2, 0);
+	vld2_lane_f16(arg_f16_ptr, arg_f16x4x2, 3);
+	vld2_lane_f16(arg_f16_ptr, arg_f16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_f16(arg_f16_ptr, arg_f16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 0);
+	vld2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 7);
+	vld2q_lane_f16(arg_f16_ptr, arg_f16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_f16(arg_f16_ptr, arg_f16x4x3, 0);
+	vld3_lane_f16(arg_f16_ptr, arg_f16x4x3, 3);
+	vld3_lane_f16(arg_f16_ptr, arg_f16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_f16(arg_f16_ptr, arg_f16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 0);
+	vld3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 7);
+	vld3q_lane_f16(arg_f16_ptr, arg_f16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_f16(arg_f16_ptr, arg_f16x4x4, 0);
+	vld4_lane_f16(arg_f16_ptr, arg_f16x4x4, 3);
+	vld4_lane_f16(arg_f16_ptr, arg_f16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_f16(arg_f16_ptr, arg_f16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 0);
+	vld4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 7);
+	vld4q_lane_f16(arg_f16_ptr, arg_f16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_f32(float32_t *arg_f32_ptr, float32x4x3_t arg_f32x4x3, float32x2x4_t arg_f32x2x4,
+						  float32x4x4_t arg_f32x4x4, float32x2x3_t arg_f32x2x3, float32x2x2_t arg_f32x2x2,
+						  float32x4x2_t arg_f32x4x2, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vld1_lane_f32(arg_f32_ptr, arg_f32x2, 0);
+	vld1_lane_f32(arg_f32_ptr, arg_f32x2, 1);
+	vld1_lane_f32(arg_f32_ptr, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_f32(arg_f32_ptr, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_f32(arg_f32_ptr, arg_f32x4, 0);
+	vld1q_lane_f32(arg_f32_ptr, arg_f32x4, 3);
+	vld1q_lane_f32(arg_f32_ptr, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_f32(arg_f32_ptr, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_f32(arg_f32_ptr, arg_f32x2x2, 0);
+	vld2_lane_f32(arg_f32_ptr, arg_f32x2x2, 1);
+	vld2_lane_f32(arg_f32_ptr, arg_f32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_f32(arg_f32_ptr, arg_f32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 0);
+	vld2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 3);
+	vld2q_lane_f32(arg_f32_ptr, arg_f32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_f32(arg_f32_ptr, arg_f32x2x3, 0);
+	vld3_lane_f32(arg_f32_ptr, arg_f32x2x3, 1);
+	vld3_lane_f32(arg_f32_ptr, arg_f32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_f32(arg_f32_ptr, arg_f32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 0);
+	vld3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 3);
+	vld3q_lane_f32(arg_f32_ptr, arg_f32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_f32(arg_f32_ptr, arg_f32x2x4, 0);
+	vld4_lane_f32(arg_f32_ptr, arg_f32x2x4, 1);
+	vld4_lane_f32(arg_f32_ptr, arg_f32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_f32(arg_f32_ptr, arg_f32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 0);
+	vld4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 3);
+	vld4q_lane_f32(arg_f32_ptr, arg_f32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_p8(poly8x16_t arg_p8x16, poly8x8x2_t arg_p8x8x2, poly8x16x4_t arg_p8x16x4,
+						 poly8_t *arg_p8_ptr, poly8x8_t arg_p8x8, poly8x8x4_t arg_p8x8x4,
+						 poly8x16x2_t arg_p8x16x2, poly8x8x3_t arg_p8x8x3, poly8x16x3_t arg_p8x16x3) {
+	vld1_lane_p8(arg_p8_ptr, arg_p8x8, 0);
+	vld1_lane_p8(arg_p8_ptr, arg_p8x8, 7);
+	vld1_lane_p8(arg_p8_ptr, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_p8(arg_p8_ptr, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_p8(arg_p8_ptr, arg_p8x16, 0);
+	vld1q_lane_p8(arg_p8_ptr, arg_p8x16, 15);
+	vld1q_lane_p8(arg_p8_ptr, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_p8(arg_p8_ptr, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_p8(arg_p8_ptr, arg_p8x8x2, 0);
+	vld2_lane_p8(arg_p8_ptr, arg_p8x8x2, 7);
+	vld2_lane_p8(arg_p8_ptr, arg_p8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_p8(arg_p8_ptr, arg_p8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 0);
+	vld2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 15);
+	vld2q_lane_p8(arg_p8_ptr, arg_p8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_p8(arg_p8_ptr, arg_p8x8x3, 0);
+	vld3_lane_p8(arg_p8_ptr, arg_p8x8x3, 7);
+	vld3_lane_p8(arg_p8_ptr, arg_p8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_p8(arg_p8_ptr, arg_p8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 0);
+	vld3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 15);
+	vld3q_lane_p8(arg_p8_ptr, arg_p8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_p8(arg_p8_ptr, arg_p8x8x4, 0);
+	vld4_lane_p8(arg_p8_ptr, arg_p8x8x4, 7);
+	vld4_lane_p8(arg_p8_ptr, arg_p8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_p8(arg_p8_ptr, arg_p8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 0);
+	vld4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 15);
+	vld4q_lane_p8(arg_p8_ptr, arg_p8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_p16(poly16x8x4_t arg_p16x8x4, poly16x8_t arg_p16x8, poly16x4x4_t arg_p16x4x4,
+						  poly16x8x3_t arg_p16x8x3, poly16_t *arg_p16_ptr, poly16x4_t arg_p16x4,
+						  poly16x8x2_t arg_p16x8x2, poly16x4x2_t arg_p16x4x2, poly16x4x3_t arg_p16x4x3) {
+	vld1_lane_p16(arg_p16_ptr, arg_p16x4, 0);
+	vld1_lane_p16(arg_p16_ptr, arg_p16x4, 3);
+	vld1_lane_p16(arg_p16_ptr, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_p16(arg_p16_ptr, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_p16(arg_p16_ptr, arg_p16x8, 0);
+	vld1q_lane_p16(arg_p16_ptr, arg_p16x8, 7);
+	vld1q_lane_p16(arg_p16_ptr, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_p16(arg_p16_ptr, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_p16(arg_p16_ptr, arg_p16x4x2, 0);
+	vld2_lane_p16(arg_p16_ptr, arg_p16x4x2, 3);
+	vld2_lane_p16(arg_p16_ptr, arg_p16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_p16(arg_p16_ptr, arg_p16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 0);
+	vld2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 7);
+	vld2q_lane_p16(arg_p16_ptr, arg_p16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_p16(arg_p16_ptr, arg_p16x4x3, 0);
+	vld3_lane_p16(arg_p16_ptr, arg_p16x4x3, 3);
+	vld3_lane_p16(arg_p16_ptr, arg_p16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_p16(arg_p16_ptr, arg_p16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 0);
+	vld3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 7);
+	vld3q_lane_p16(arg_p16_ptr, arg_p16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_p16(arg_p16_ptr, arg_p16x4x4, 0);
+	vld4_lane_p16(arg_p16_ptr, arg_p16x4x4, 3);
+	vld4_lane_p16(arg_p16_ptr, arg_p16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_p16(arg_p16_ptr, arg_p16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 0);
+	vld4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 7);
+	vld4q_lane_p16(arg_p16_ptr, arg_p16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_f64(float64x1_t arg_f64x1, float64x1x2_t arg_f64x1x2, float64_t* arg_f64_ptr,
+						  float64x2x3_t arg_f64x2x3, float64x2x4_t arg_f64x2x4, float64x2x2_t arg_f64x2x2,
+						  float64x2_t arg_f64x2, float64x1x3_t arg_f64x1x3, float64x1x4_t arg_f64x1x4) {
+	vld1_lane_f64(arg_f64_ptr, arg_f64x1, 0);
+	vld1_lane_f64(arg_f64_ptr, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_f64(arg_f64_ptr, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_f64(arg_f64_ptr, arg_f64x2, 0);
+	vld1q_lane_f64(arg_f64_ptr, arg_f64x2, 1);
+	vld1q_lane_f64(arg_f64_ptr, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_f64(arg_f64_ptr, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1_lane_f64(arg_f64_ptr, arg_f64x1, 0);
+	vldap1_lane_f64(arg_f64_ptr, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1_lane_f64(arg_f64_ptr, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1q_lane_f64(arg_f64_ptr, arg_f64x2, 0);
+	vldap1q_lane_f64(arg_f64_ptr, arg_f64x2, 1);
+	vldap1q_lane_f64(arg_f64_ptr, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1q_lane_f64(arg_f64_ptr, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1_lane_f64(arg_f64_ptr, arg_f64x1, 0);
+	vstl1_lane_f64(arg_f64_ptr, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1_lane_f64(arg_f64_ptr, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1q_lane_f64(arg_f64_ptr, arg_f64x2, 0);
+	vstl1q_lane_f64(arg_f64_ptr, arg_f64x2, 1);
+	vstl1q_lane_f64(arg_f64_ptr, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1q_lane_f64(arg_f64_ptr, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_f64(arg_f64_ptr, arg_f64x1x2, 0);
+	vld2_lane_f64(arg_f64_ptr, arg_f64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_f64(arg_f64_ptr, arg_f64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 0);
+	vld2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 1);
+	vld2q_lane_f64(arg_f64_ptr, arg_f64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_f64(arg_f64_ptr, arg_f64x1x3, 0);
+	vld3_lane_f64(arg_f64_ptr, arg_f64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_f64(arg_f64_ptr, arg_f64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 0);
+	vld3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 1);
+	vld3q_lane_f64(arg_f64_ptr, arg_f64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_f64(arg_f64_ptr, arg_f64x1x4, 0);
+	vld4_lane_f64(arg_f64_ptr, arg_f64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_f64(arg_f64_ptr, arg_f64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 0);
+	vld4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 1);
+	vld4q_lane_f64(arg_f64_ptr, arg_f64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c
new file mode 100644
index 000000000000..a7eee3ad25e0
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c
@@ -0,0 +1,200 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+void test_vector_multiply_accumulate_by_scalar_s16(int32x4_t arg_i32x4, int16x8_t arg_i16x8, int16x4_t arg_i16x4) {
+	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
+	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
+	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 0);
+	vmlaq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 3);
+	vmlaq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 0);
+	vmla_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 7);
+	vmla_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 0);
+	vmlaq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 7);
+	vmlaq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 0);
+	vmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 3);
+	vmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 0);
+	vmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 3);
+	vmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 0);
+	vmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 7);
+	vmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 0);
+	vmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 7);
+	vmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_accumulate_by_scalar_s32(int32x4_t arg_i32x4, int32x2_t arg_i32x2, int64x2_t arg_i64x2) {
+	vmla_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 0);
+	vmla_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 1);
+	vmla_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 0);
+	vmlaq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 1);
+	vmlaq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 0);
+	vmla_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 3);
+	vmla_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 0);
+	vmlaq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 3);
+	vmlaq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 0);
+	vmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 1);
+	vmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 0);
+	vmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 1);
+	vmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 0);
+	vmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 3);
+	vmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 0);
+	vmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 3);
+	vmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_accumulate_by_scalar_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8, uint32x4_t arg_u32x4) {
+	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 0);
+	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 3);
+	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 0);
+	vmlaq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 3);
+	vmlaq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 0);
+	vmla_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 7);
+	vmla_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 0);
+	vmlaq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 7);
+	vmlaq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 0);
+	vmlal_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 3);
+	vmlal_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 0);
+	vmlal_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 3);
+	vmlal_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 0);
+	vmlal_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 7);
+	vmlal_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 0);
+	vmlal_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 7);
+	vmlal_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_accumulate_by_scalar_u32(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vmla_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 0);
+	vmla_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 1);
+	vmla_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 0);
+	vmlaq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 1);
+	vmlaq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 0);
+	vmla_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 3);
+	vmla_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 0);
+	vmlaq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 3);
+	vmlaq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 0);
+	vmlal_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 1);
+	vmlal_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 0);
+	vmlal_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 1);
+	vmlal_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 0);
+	vmlal_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 3);
+	vmlal_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 0);
+	vmlal_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 3);
+	vmlal_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_accumulate_by_scalar_f32(float32x4_t arg_f32x4, float32x2_t arg_f32x2) {
+	vmla_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 0);
+	vmla_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 1);
+	vmla_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 0);
+	vmlaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 1);
+	vmlaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 0);
+	vmla_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 3);
+	vmla_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 0);
+	vmlaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 3);
+	vmlaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c
new file mode 100644
index 000000000000..1ed848742e68
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c
@@ -0,0 +1,98 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_vector_multiply_by_scalar_and_widen_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vmull_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vmull_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vmull_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vmull_high_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vmull_high_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vmull_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vmull_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vmull_high_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vmull_high_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_and_widen_s32(int32x4_t arg_i32x4, int32x2_t arg_i32x2) {
+	vmull_lane_s32(arg_i32x2, arg_i32x2, 0);
+	vmull_lane_s32(arg_i32x2, arg_i32x2, 1);
+	vmull_lane_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_lane_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_lane_s32(arg_i32x4, arg_i32x2, 0);
+	vmull_high_lane_s32(arg_i32x4, arg_i32x2, 1);
+	vmull_high_lane_s32(arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_lane_s32(arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_laneq_s32(arg_i32x2, arg_i32x4, 0);
+	vmull_laneq_s32(arg_i32x2, arg_i32x4, 3);
+	vmull_laneq_s32(arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_laneq_s32(arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_laneq_s32(arg_i32x4, arg_i32x4, 0);
+	vmull_high_laneq_s32(arg_i32x4, arg_i32x4, 3);
+	vmull_high_laneq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_laneq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_and_widen_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vmull_lane_u16(arg_u16x4, arg_u16x4, 0);
+	vmull_lane_u16(arg_u16x4, arg_u16x4, 3);
+	vmull_lane_u16(arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_lane_u16(arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_lane_u16(arg_u16x8, arg_u16x4, 0);
+	vmull_high_lane_u16(arg_u16x8, arg_u16x4, 3);
+	vmull_high_lane_u16(arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_lane_u16(arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_laneq_u16(arg_u16x4, arg_u16x8, 0);
+	vmull_laneq_u16(arg_u16x4, arg_u16x8, 7);
+	vmull_laneq_u16(arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_laneq_u16(arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_laneq_u16(arg_u16x8, arg_u16x8, 0);
+	vmull_high_laneq_u16(arg_u16x8, arg_u16x8, 7);
+	vmull_high_laneq_u16(arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_laneq_u16(arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_and_widen_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vmull_lane_u32(arg_u32x2, arg_u32x2, 0);
+	vmull_lane_u32(arg_u32x2, arg_u32x2, 1);
+	vmull_lane_u32(arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_lane_u32(arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_lane_u32(arg_u32x4, arg_u32x2, 0);
+	vmull_high_lane_u32(arg_u32x4, arg_u32x2, 1);
+	vmull_high_lane_u32(arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_lane_u32(arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_laneq_u32(arg_u32x2, arg_u32x4, 0);
+	vmull_laneq_u32(arg_u32x2, arg_u32x4, 3);
+	vmull_laneq_u32(arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_laneq_u32(arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_laneq_u32(arg_u32x4, arg_u32x4, 0);
+	vmull_high_laneq_u32(arg_u32x4, arg_u32x4, 3);
+	vmull_high_laneq_u32(arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_laneq_u32(arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c
new file mode 100644
index 000000000000..7c9e73fb12a5
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c
@@ -0,0 +1,160 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_vector_multiply_by_scalar_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vmul_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vmul_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vmul_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vmulq_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vmulq_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vmul_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vmul_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vmulq_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vmulq_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vmul_lane_s32(arg_i32x2, arg_i32x2, 0);
+	vmul_lane_s32(arg_i32x2, arg_i32x2, 1);
+	vmul_lane_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_s32(arg_i32x4, arg_i32x2, 0);
+	vmulq_lane_s32(arg_i32x4, arg_i32x2, 1);
+	vmulq_lane_s32(arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_s32(arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_s32(arg_i32x2, arg_i32x4, 0);
+	vmul_laneq_s32(arg_i32x2, arg_i32x4, 3);
+	vmul_laneq_s32(arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_s32(arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_s32(arg_i32x4, arg_i32x4, 0);
+	vmulq_laneq_s32(arg_i32x4, arg_i32x4, 3);
+	vmulq_laneq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vmul_lane_u16(arg_u16x4, arg_u16x4, 0);
+	vmul_lane_u16(arg_u16x4, arg_u16x4, 3);
+	vmul_lane_u16(arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_u16(arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_u16(arg_u16x8, arg_u16x4, 0);
+	vmulq_lane_u16(arg_u16x8, arg_u16x4, 3);
+	vmulq_lane_u16(arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_u16(arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_u16(arg_u16x4, arg_u16x8, 0);
+	vmul_laneq_u16(arg_u16x4, arg_u16x8, 7);
+	vmul_laneq_u16(arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_u16(arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_u16(arg_u16x8, arg_u16x8, 0);
+	vmulq_laneq_u16(arg_u16x8, arg_u16x8, 7);
+	vmulq_laneq_u16(arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_u16(arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vmul_lane_u32(arg_u32x2, arg_u32x2, 0);
+	vmul_lane_u32(arg_u32x2, arg_u32x2, 1);
+	vmul_lane_u32(arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_u32(arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_u32(arg_u32x4, arg_u32x2, 0);
+	vmulq_lane_u32(arg_u32x4, arg_u32x2, 1);
+	vmulq_lane_u32(arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_u32(arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_u32(arg_u32x2, arg_u32x4, 0);
+	vmul_laneq_u32(arg_u32x2, arg_u32x4, 3);
+	vmul_laneq_u32(arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_u32(arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_u32(arg_u32x4, arg_u32x4, 0);
+	vmulq_laneq_u32(arg_u32x4, arg_u32x4, 3);
+	vmulq_laneq_u32(arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_u32(arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_f32(float32_t arg_f32, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vmul_lane_f32(arg_f32x2, arg_f32x2, 0);
+	vmul_lane_f32(arg_f32x2, arg_f32x2, 1);
+	vmul_lane_f32(arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_f32(arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_f32(arg_f32x4, arg_f32x2, 0);
+	vmulq_lane_f32(arg_f32x4, arg_f32x2, 1);
+	vmulq_lane_f32(arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_f32(arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmuls_lane_f32(arg_f32, arg_f32x2, 0);
+	vmuls_lane_f32(arg_f32, arg_f32x2, 1);
+	vmuls_lane_f32(arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmuls_lane_f32(arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_f32(arg_f32x2, arg_f32x4, 0);
+	vmul_laneq_f32(arg_f32x2, arg_f32x4, 3);
+	vmul_laneq_f32(arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_f32(arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_f32(arg_f32x4, arg_f32x4, 0);
+	vmulq_laneq_f32(arg_f32x4, arg_f32x4, 3);
+	vmulq_laneq_f32(arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_f32(arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmuls_laneq_f32(arg_f32, arg_f32x4, 0);
+	vmuls_laneq_f32(arg_f32, arg_f32x4, 3);
+	vmuls_laneq_f32(arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmuls_laneq_f32(arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_f64(float64x1_t arg_f64x1, float64_t arg_f64, float64x2_t arg_f64x2) {
+	vmul_lane_f64(arg_f64x1, arg_f64x1, 0);
+	vmul_lane_f64(arg_f64x1, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_f64(arg_f64x1, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_f64(arg_f64x2, arg_f64x1, 0);
+	vmulq_lane_f64(arg_f64x2, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_f64(arg_f64x2, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmuld_lane_f64(arg_f64, arg_f64x1, 0);
+	vmuld_lane_f64(arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmuld_lane_f64(arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_f64(arg_f64x1, arg_f64x2, 0);
+	vmul_laneq_f64(arg_f64x1, arg_f64x2, 1);
+	vmul_laneq_f64(arg_f64x1, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_f64(arg_f64x1, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_f64(arg_f64x2, arg_f64x2, 0);
+	vmulq_laneq_f64(arg_f64x2, arg_f64x2, 1);
+	vmulq_laneq_f64(arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_f64(arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmuld_laneq_f64(arg_f64, arg_f64x2, 0);
+	vmuld_laneq_f64(arg_f64, arg_f64x2, 1);
+	vmuld_laneq_f64(arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmuld_laneq_f64(arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c
new file mode 100644
index 000000000000..c717948b13da
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c
@@ -0,0 +1,201 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_vector_multiply_subtract_by_scalar_s16(int16x8_t arg_i16x8, int16x4_t arg_i16x4, int32x4_t arg_i32x4) {
+	vmls_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
+	vmls_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
+	vmls_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 0);
+	vmlsq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 3);
+	vmlsq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 0);
+	vmls_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 7);
+	vmls_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 0);
+	vmlsq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 7);
+	vmlsq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 0);
+	vmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 3);
+	vmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 0);
+	vmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 3);
+	vmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 0);
+	vmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 7);
+	vmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 0);
+	vmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 7);
+	vmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_subtract_by_scalar_s32(int64x2_t arg_i64x2, int32x4_t arg_i32x4, int32x2_t arg_i32x2) {
+	vmls_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 0);
+	vmls_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 1);
+	vmls_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 0);
+	vmlsq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 1);
+	vmlsq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 0);
+	vmls_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 3);
+	vmls_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 0);
+	vmlsq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 3);
+	vmlsq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 0);
+	vmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 1);
+	vmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 0);
+	vmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 1);
+	vmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 0);
+	vmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 3);
+	vmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 0);
+	vmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 3);
+	vmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_subtract_by_scalar_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4, uint32x4_t arg_u32x4) {
+	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 0);
+	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 3);
+	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 0);
+	vmlsq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 3);
+	vmlsq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 0);
+	vmls_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 7);
+	vmls_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 0);
+	vmlsq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 7);
+	vmlsq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 0);
+	vmlsl_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 3);
+	vmlsl_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 0);
+	vmlsl_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 3);
+	vmlsl_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 0);
+	vmlsl_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 7);
+	vmlsl_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 0);
+	vmlsl_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 7);
+	vmlsl_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_subtract_by_scalar_u32(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vmls_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 0);
+	vmls_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 1);
+	vmls_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 0);
+	vmlsq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 1);
+	vmlsq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 0);
+	vmls_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 3);
+	vmls_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 0);
+	vmlsq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 3);
+	vmlsq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 0);
+	vmlsl_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 1);
+	vmlsl_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 0);
+	vmlsl_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 1);
+	vmlsl_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 0);
+	vmlsl_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 3);
+	vmlsl_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 0);
+	vmlsl_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 3);
+	vmlsl_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_subtract_by_scalar_f32(float32x4_t arg_f32x4, float32x2_t arg_f32x2) {
+	vmls_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 0);
+	vmls_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 1);
+	vmls_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 0);
+	vmlsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 1);
+	vmlsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 0);
+	vmls_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 3);
+	vmls_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 0);
+	vmlsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 3);
+	vmlsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-left.c clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-left.c
new file mode 100644
index 000000000000..1def72fc843d
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-left.c
@@ -0,0 +1,542 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// Widening left-shifts should have a range of 0..(sizeinbits(arg)-1), this range has had
+// to be weakened to 0..((sizeinbits(arg)*2)-1) due to a use of vshll_n_s16 with an
+// out-of-bounds immediate in the defintiion of vcvt_f32_bf16. As a result, the upper bounds
+// of widening left-shift intrinsics are not currently tested here.
+
+void test_vector_shift_left_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vshl_n_s8(arg_i8x8, 0);
+	vshl_n_s8(arg_i8x8, 7);
+	vshl_n_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_s8(arg_i8x16, 0);
+	vshlq_n_s8(arg_i8x16, 7);
+	vshlq_n_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_s8(arg_i8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vshl_n_s16(arg_i16x4, 0);
+	vshl_n_s16(arg_i16x4, 15);
+	vshl_n_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_s16(arg_i16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_s16(arg_i16x8, 0);
+	vshlq_n_s16(arg_i16x8, 15);
+	vshlq_n_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_s16(arg_i16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vshl_n_s32(arg_i32x2, 0);
+	vshl_n_s32(arg_i32x2, 31);
+	vshl_n_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_s32(arg_i32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_s32(arg_i32x4, 0);
+	vshlq_n_s32(arg_i32x4, 31);
+	vshlq_n_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_s32(arg_i32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_s64(int64_t arg_i64, int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vshl_n_s64(arg_i64x1, 0);
+	vshl_n_s64(arg_i64x1, 63);
+	vshl_n_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_s64(arg_i64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_s64(arg_i64x2, 0);
+	vshlq_n_s64(arg_i64x2, 63);
+	vshlq_n_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_s64(arg_i64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshld_n_s64(arg_i64, 0);
+	vshld_n_s64(arg_i64, 63);
+	vshld_n_s64(arg_i64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshld_n_s64(arg_i64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vshl_n_u8(arg_u8x8, 0);
+	vshl_n_u8(arg_u8x8, 7);
+	vshl_n_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_u8(arg_u8x16, 0);
+	vshlq_n_u8(arg_u8x16, 7);
+	vshlq_n_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_u8(arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vshl_n_u16(arg_u16x4, 0);
+	vshl_n_u16(arg_u16x4, 15);
+	vshl_n_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_u16(arg_u16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_u16(arg_u16x8, 0);
+	vshlq_n_u16(arg_u16x8, 15);
+	vshlq_n_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_u16(arg_u16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vshl_n_u32(arg_u32x2, 0);
+	vshl_n_u32(arg_u32x2, 31);
+	vshl_n_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_u32(arg_u32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_u32(arg_u32x4, 0);
+	vshlq_n_u32(arg_u32x4, 31);
+	vshlq_n_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_u32(arg_u32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_u64(uint64x1_t arg_u64x1, uint64_t arg_u64, uint64x2_t arg_u64x2) {
+	vshl_n_u64(arg_u64x1, 0);
+	vshl_n_u64(arg_u64x1, 63);
+	vshl_n_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_u64(arg_u64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_u64(arg_u64x2, 0);
+	vshlq_n_u64(arg_u64x2, 63);
+	vshlq_n_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_u64(arg_u64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshld_n_u64(arg_u64, 0);
+	vshld_n_u64(arg_u64, 63);
+	vshld_n_u64(arg_u64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshld_n_u64(arg_u64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16, int8_t arg_i8) {
+	vqshl_n_s8(arg_i8x8, 0);
+	vqshl_n_s8(arg_i8x8, 7);
+	vqshl_n_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_s8(arg_i8x16, 0);
+	vqshlq_n_s8(arg_i8x16, 7);
+	vqshlq_n_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_s8(arg_i8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlb_n_s8(arg_i8, 0);
+	vqshlb_n_s8(arg_i8, 7);
+	vqshlb_n_s8(arg_i8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlb_n_s8(arg_i8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlu_n_s8(arg_i8x8, 0);
+	vqshlu_n_s8(arg_i8x8, 7);
+	vqshlu_n_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlu_n_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluq_n_s8(arg_i8x16, 0);
+	vqshluq_n_s8(arg_i8x16, 7);
+	vqshluq_n_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluq_n_s8(arg_i8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlub_n_s8(arg_i8, 0);
+	vqshlub_n_s8(arg_i8, 7);
+	vqshlub_n_s8(arg_i8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlub_n_s8(arg_i8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_s16(int16x4_t arg_i16x4, int16_t arg_i16, int16x8_t arg_i16x8) {
+	vqshl_n_s16(arg_i16x4, 0);
+	vqshl_n_s16(arg_i16x4, 15);
+	vqshl_n_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_s16(arg_i16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_s16(arg_i16x8, 0);
+	vqshlq_n_s16(arg_i16x8, 15);
+	vqshlq_n_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_s16(arg_i16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlh_n_s16(arg_i16, 0);
+	vqshlh_n_s16(arg_i16, 15);
+	vqshlh_n_s16(arg_i16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlh_n_s16(arg_i16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlu_n_s16(arg_i16x4, 0);
+	vqshlu_n_s16(arg_i16x4, 15);
+	vqshlu_n_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlu_n_s16(arg_i16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluq_n_s16(arg_i16x8, 0);
+	vqshluq_n_s16(arg_i16x8, 15);
+	vqshluq_n_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluq_n_s16(arg_i16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluh_n_s16(arg_i16, 0);
+	vqshluh_n_s16(arg_i16, 15);
+	vqshluh_n_s16(arg_i16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluh_n_s16(arg_i16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_s32(int32x2_t arg_i32x2, int32_t arg_i32, int32x4_t arg_i32x4) {
+	vqshl_n_s32(arg_i32x2, 0);
+	vqshl_n_s32(arg_i32x2, 31);
+	vqshl_n_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_s32(arg_i32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_s32(arg_i32x4, 0);
+	vqshlq_n_s32(arg_i32x4, 31);
+	vqshlq_n_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_s32(arg_i32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshls_n_s32(arg_i32, 0);
+	vqshls_n_s32(arg_i32, 31);
+	vqshls_n_s32(arg_i32, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshls_n_s32(arg_i32, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlu_n_s32(arg_i32x2, 0);
+	vqshlu_n_s32(arg_i32x2, 31);
+	vqshlu_n_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlu_n_s32(arg_i32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluq_n_s32(arg_i32x4, 0);
+	vqshluq_n_s32(arg_i32x4, 31);
+	vqshluq_n_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluq_n_s32(arg_i32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlus_n_s32(arg_i32, 0);
+	vqshlus_n_s32(arg_i32, 31);
+	vqshlus_n_s32(arg_i32, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlus_n_s32(arg_i32, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_s64(int64_t arg_i64, int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vqshl_n_s64(arg_i64x1, 0);
+	vqshl_n_s64(arg_i64x1, 63);
+	vqshl_n_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_s64(arg_i64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_s64(arg_i64x2, 0);
+	vqshlq_n_s64(arg_i64x2, 63);
+	vqshlq_n_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_s64(arg_i64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshld_n_s64(arg_i64, 0);
+	vqshld_n_s64(arg_i64, 63);
+	vqshld_n_s64(arg_i64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshld_n_s64(arg_i64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlu_n_s64(arg_i64x1, 0);
+	vqshlu_n_s64(arg_i64x1, 63);
+	vqshlu_n_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlu_n_s64(arg_i64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluq_n_s64(arg_i64x2, 0);
+	vqshluq_n_s64(arg_i64x2, 63);
+	vqshluq_n_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluq_n_s64(arg_i64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlud_n_s64(arg_i64, 0);
+	vqshlud_n_s64(arg_i64, 63);
+	vqshlud_n_s64(arg_i64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlud_n_s64(arg_i64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_u8(uint8x8_t arg_u8x8, uint8_t arg_u8, uint8x16_t arg_u8x16) {
+	vqshl_n_u8(arg_u8x8, 0);
+	vqshl_n_u8(arg_u8x8, 7);
+	vqshl_n_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_u8(arg_u8x16, 0);
+	vqshlq_n_u8(arg_u8x16, 7);
+	vqshlq_n_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_u8(arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlb_n_u8(arg_u8, 0);
+	vqshlb_n_u8(arg_u8, 7);
+	vqshlb_n_u8(arg_u8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlb_n_u8(arg_u8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_u16(uint16_t arg_u16, uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vqshl_n_u16(arg_u16x4, 0);
+	vqshl_n_u16(arg_u16x4, 15);
+	vqshl_n_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_u16(arg_u16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_u16(arg_u16x8, 0);
+	vqshlq_n_u16(arg_u16x8, 15);
+	vqshlq_n_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_u16(arg_u16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlh_n_u16(arg_u16, 0);
+	vqshlh_n_u16(arg_u16, 15);
+	vqshlh_n_u16(arg_u16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlh_n_u16(arg_u16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4, uint32_t arg_u32) {
+	vqshl_n_u32(arg_u32x2, 0);
+	vqshl_n_u32(arg_u32x2, 31);
+	vqshl_n_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_u32(arg_u32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_u32(arg_u32x4, 0);
+	vqshlq_n_u32(arg_u32x4, 31);
+	vqshlq_n_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_u32(arg_u32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshls_n_u32(arg_u32, 0);
+	vqshls_n_u32(arg_u32, 31);
+	vqshls_n_u32(arg_u32, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshls_n_u32(arg_u32, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_u64(uint64x1_t arg_u64x1, uint64_t arg_u64, uint64x2_t arg_u64x2) {
+	vqshl_n_u64(arg_u64x1, 0);
+	vqshl_n_u64(arg_u64x1, 63);
+	vqshl_n_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_u64(arg_u64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_u64(arg_u64x2, 0);
+	vqshlq_n_u64(arg_u64x2, 63);
+	vqshlq_n_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_u64(arg_u64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshld_n_u64(arg_u64, 0);
+	vqshld_n_u64(arg_u64, 63);
+	vqshld_n_u64(arg_u64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshld_n_u64(arg_u64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_widen_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vshll_n_s8(arg_i8x8, 0);
+	vshll_n_s8(arg_i8x8, 7);
+	vshll_n_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+
+	vshll_high_n_s8(arg_i8x16, 0);
+	vshll_high_n_s8(arg_i8x16, 7);
+	vshll_high_n_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vshll_n_s16(arg_i16x4, 0);
+	vshll_n_s16(arg_i16x4, 15);
+	vshll_n_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_s16(arg_i16x8, 0);
+	vshll_high_n_s16(arg_i16x8, 15);
+	vshll_high_n_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vshll_n_s32(arg_i32x2, 0);
+	vshll_n_s32(arg_i32x2, 31);
+	vshll_n_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_s32(arg_i32x4, 0);
+	vshll_high_n_s32(arg_i32x4, 31);
+	vshll_high_n_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vshll_n_u8(arg_u8x8, 0);
+	vshll_n_u8(arg_u8x8, 7);
+	vshll_n_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_u8(arg_u8x16, 0);
+	vshll_high_n_u8(arg_u8x16, 7);
+	vshll_high_n_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vshll_n_u16(arg_u16x4, 0);
+	vshll_n_u16(arg_u16x4, 15);
+	vshll_n_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_u16(arg_u16x8, 0);
+	vshll_high_n_u16(arg_u16x8, 15);
+	vshll_high_n_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vshll_n_u32(arg_u32x2, 0);
+	vshll_n_u32(arg_u32x2, 31);
+	vshll_n_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_u32(arg_u32x4, 0);
+	vshll_high_n_u32(arg_u32x4, 31);
+	vshll_high_n_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_insert_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vsli_n_s8(arg_i8x8, arg_i8x8, 0);
+	vsli_n_s8(arg_i8x8, arg_i8x8, 7);
+	vsli_n_s8(arg_i8x8, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_s8(arg_i8x8, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_s8(arg_i8x16, arg_i8x16, 0);
+	vsliq_n_s8(arg_i8x16, arg_i8x16, 7);
+	vsliq_n_s8(arg_i8x16, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_s8(arg_i8x16, arg_i8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vsli_n_s16(arg_i16x4, arg_i16x4, 0);
+	vsli_n_s16(arg_i16x4, arg_i16x4, 15);
+	vsli_n_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_s16(arg_i16x4, arg_i16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_s16(arg_i16x8, arg_i16x8, 0);
+	vsliq_n_s16(arg_i16x8, arg_i16x8, 15);
+	vsliq_n_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_s16(arg_i16x8, arg_i16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vsli_n_s32(arg_i32x2, arg_i32x2, 0);
+	vsli_n_s32(arg_i32x2, arg_i32x2, 31);
+	vsli_n_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_s32(arg_i32x2, arg_i32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_s32(arg_i32x4, arg_i32x4, 0);
+	vsliq_n_s32(arg_i32x4, arg_i32x4, 31);
+	vsliq_n_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_s32(arg_i32x4, arg_i32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_s64(int64_t arg_i64, int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vsli_n_s64(arg_i64x1, arg_i64x1, 0);
+	vsli_n_s64(arg_i64x1, arg_i64x1, 63);
+	vsli_n_s64(arg_i64x1, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_s64(arg_i64x1, arg_i64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_s64(arg_i64x2, arg_i64x2, 0);
+	vsliq_n_s64(arg_i64x2, arg_i64x2, 63);
+	vsliq_n_s64(arg_i64x2, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_s64(arg_i64x2, arg_i64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vslid_n_s64(arg_i64, arg_i64, 0);
+	vslid_n_s64(arg_i64, arg_i64, 63);
+	vslid_n_s64(arg_i64, arg_i64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vslid_n_s64(arg_i64, arg_i64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vsli_n_u8(arg_u8x8, arg_u8x8, 0);
+	vsli_n_u8(arg_u8x8, arg_u8x8, 7);
+	vsli_n_u8(arg_u8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_u8(arg_u8x8, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_u8(arg_u8x16, arg_u8x16, 0);
+	vsliq_n_u8(arg_u8x16, arg_u8x16, 7);
+	vsliq_n_u8(arg_u8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_u8(arg_u8x16, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vsli_n_u16(arg_u16x4, arg_u16x4, 0);
+	vsli_n_u16(arg_u16x4, arg_u16x4, 15);
+	vsli_n_u16(arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_u16(arg_u16x4, arg_u16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_u16(arg_u16x8, arg_u16x8, 0);
+	vsliq_n_u16(arg_u16x8, arg_u16x8, 15);
+	vsliq_n_u16(arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_u16(arg_u16x8, arg_u16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vsli_n_u32(arg_u32x2, arg_u32x2, 0);
+	vsli_n_u32(arg_u32x2, arg_u32x2, 31);
+	vsli_n_u32(arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_u32(arg_u32x2, arg_u32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_u32(arg_u32x4, arg_u32x4, 0);
+	vsliq_n_u32(arg_u32x4, arg_u32x4, 31);
+	vsliq_n_u32(arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_u32(arg_u32x4, arg_u32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_u64(uint64x1_t arg_u64x1, uint64_t arg_u64, uint64x2_t arg_u64x2) {
+	vsli_n_u64(arg_u64x1, arg_u64x1, 0);
+	vsli_n_u64(arg_u64x1, arg_u64x1, 63);
+	vsli_n_u64(arg_u64x1, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_u64(arg_u64x1, arg_u64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_u64(arg_u64x2, arg_u64x2, 0);
+	vsliq_n_u64(arg_u64x2, arg_u64x2, 63);
+	vsliq_n_u64(arg_u64x2, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_u64(arg_u64x2, arg_u64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vslid_n_u64(arg_u64, arg_u64, 0);
+	vslid_n_u64(arg_u64, arg_u64, 63);
+	vslid_n_u64(arg_u64, arg_u64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vslid_n_u64(arg_u64, arg_u64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_p64(poly64x2_t arg_p64x2, poly64x1_t arg_p64x1) {
+	vsli_n_p64(arg_p64x1, arg_p64x1, 0);
+	vsli_n_p64(arg_p64x1, arg_p64x1, 63);
+	vsli_n_p64(arg_p64x1, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_p64(arg_p64x1, arg_p64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_p64(arg_p64x2, arg_p64x2, 0);
+	vsliq_n_p64(arg_p64x2, arg_p64x2, 63);
+	vsliq_n_p64(arg_p64x2, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_p64(arg_p64x2, arg_p64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vsli_n_p8(arg_p8x8, arg_p8x8, 0);
+	vsli_n_p8(arg_p8x8, arg_p8x8, 7);
+	vsli_n_p8(arg_p8x8, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_p8(arg_p8x8, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_p8(arg_p8x16, arg_p8x16, 0);
+	vsliq_n_p8(arg_p8x16, arg_p8x16, 7);
+	vsliq_n_p8(arg_p8x16, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_p8(arg_p8x16, arg_p8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8) {
+	vsli_n_p16(arg_p16x4, arg_p16x4, 0);
+	vsli_n_p16(arg_p16x4, arg_p16x4, 15);
+	vsli_n_p16(arg_p16x4, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_p16(arg_p16x4, arg_p16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_p16(arg_p16x8, arg_p16x8, 0);
+	vsliq_n_p16(arg_p16x8, arg_p16x8, 15);
+	vsliq_n_p16(arg_p16x8, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_p16(arg_p16x8, arg_p16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-right.c clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-right.c
new file mode 100644
index 000000000000..ad4677fe4366
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-right.c
@@ -0,0 +1,1083 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_vector_shift_right_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vshr_n_s8(arg_i8x8, 1);
+	vshr_n_s8(arg_i8x8, 8);
+	vshr_n_s8(arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_s8(arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_s8(arg_i8x16, 1);
+	vshrq_n_s8(arg_i8x16, 8);
+	vshrq_n_s8(arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_s8(arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vshr_n_s16(arg_i16x4, 1);
+	vshr_n_s16(arg_i16x4, 16);
+	vshr_n_s16(arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_s16(arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_s16(arg_i16x8, 1);
+	vshrq_n_s16(arg_i16x8, 16);
+	vshrq_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_s16(arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vshr_n_s32(arg_i32x2, 1);
+	vshr_n_s32(arg_i32x2, 32);
+	vshr_n_s32(arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_s32(arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_s32(arg_i32x4, 1);
+	vshrq_n_s32(arg_i32x4, 32);
+	vshrq_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_s32(arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vshr_n_s64(arg_i64x1, 1);
+	vshr_n_s64(arg_i64x1, 64);
+	vshr_n_s64(arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_s64(arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_s64(arg_i64x2, 1);
+	vshrq_n_s64(arg_i64x2, 64);
+	vshrq_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_s64(arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrd_n_s64(arg_i64, 1);
+	vshrd_n_s64(arg_i64, 64);
+	vshrd_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrd_n_s64(arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vshr_n_u8(arg_u8x8, 1);
+	vshr_n_u8(arg_u8x8, 8);
+	vshr_n_u8(arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_u8(arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_u8(arg_u8x16, 1);
+	vshrq_n_u8(arg_u8x16, 8);
+	vshrq_n_u8(arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_u8(arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vshr_n_u16(arg_u16x4, 1);
+	vshr_n_u16(arg_u16x4, 16);
+	vshr_n_u16(arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_u16(arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_u16(arg_u16x8, 1);
+	vshrq_n_u16(arg_u16x8, 16);
+	vshrq_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_u16(arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vshr_n_u32(arg_u32x2, 1);
+	vshr_n_u32(arg_u32x2, 32);
+	vshr_n_u32(arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_u32(arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_u32(arg_u32x4, 1);
+	vshrq_n_u32(arg_u32x4, 32);
+	vshrq_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_u32(arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vshr_n_u64(arg_u64x1, 1);
+	vshr_n_u64(arg_u64x1, 64);
+	vshr_n_u64(arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_u64(arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_u64(arg_u64x2, 1);
+	vshrq_n_u64(arg_u64x2, 64);
+	vshrq_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_u64(arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrd_n_u64(arg_u64, 1);
+	vshrd_n_u64(arg_u64, 64);
+	vshrd_n_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrd_n_u64(arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vrshr_n_s8(arg_i8x8, 1);
+	vrshr_n_s8(arg_i8x8, 8);
+	vrshr_n_s8(arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_s8(arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_s8(arg_i8x16, 1);
+	vrshrq_n_s8(arg_i8x16, 8);
+	vrshrq_n_s8(arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_s8(arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vrshr_n_s16(arg_i16x4, 1);
+	vrshr_n_s16(arg_i16x4, 16);
+	vrshr_n_s16(arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_s16(arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_s16(arg_i16x8, 1);
+	vrshrq_n_s16(arg_i16x8, 16);
+	vrshrq_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_s16(arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vrshr_n_s32(arg_i32x2, 1);
+	vrshr_n_s32(arg_i32x2, 32);
+	vrshr_n_s32(arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_s32(arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_s32(arg_i32x4, 1);
+	vrshrq_n_s32(arg_i32x4, 32);
+	vrshrq_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_s32(arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vrshr_n_s64(arg_i64x1, 1);
+	vrshr_n_s64(arg_i64x1, 64);
+	vrshr_n_s64(arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_s64(arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_s64(arg_i64x2, 1);
+	vrshrq_n_s64(arg_i64x2, 64);
+	vrshrq_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_s64(arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrd_n_s64(arg_i64, 1);
+	vrshrd_n_s64(arg_i64, 64);
+	vrshrd_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrd_n_s64(arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vrshr_n_u8(arg_u8x8, 1);
+	vrshr_n_u8(arg_u8x8, 8);
+	vrshr_n_u8(arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_u8(arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_u8(arg_u8x16, 1);
+	vrshrq_n_u8(arg_u8x16, 8);
+	vrshrq_n_u8(arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_u8(arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vrshr_n_u16(arg_u16x4, 1);
+	vrshr_n_u16(arg_u16x4, 16);
+	vrshr_n_u16(arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_u16(arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_u16(arg_u16x8, 1);
+	vrshrq_n_u16(arg_u16x8, 16);
+	vrshrq_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_u16(arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vrshr_n_u32(arg_u32x2, 1);
+	vrshr_n_u32(arg_u32x2, 32);
+	vrshr_n_u32(arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_u32(arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_u32(arg_u32x4, 1);
+	vrshrq_n_u32(arg_u32x4, 32);
+	vrshrq_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_u32(arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vrshr_n_u64(arg_u64x1, 1);
+	vrshr_n_u64(arg_u64x1, 64);
+	vrshr_n_u64(arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_u64(arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_u64(arg_u64x2, 1);
+	vrshrq_n_u64(arg_u64x2, 64);
+	vrshrq_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_u64(arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrd_n_u64(arg_u64, 1);
+	vrshrd_n_u64(arg_u64, 64);
+	vrshrd_n_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrd_n_u64(arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vsra_n_s8(arg_i8x8, arg_i8x8, 1);
+	vsra_n_s8(arg_i8x8, arg_i8x8, 8);
+	vsra_n_s8(arg_i8x8, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_s8(arg_i8x8, arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_s8(arg_i8x16, arg_i8x16, 1);
+	vsraq_n_s8(arg_i8x16, arg_i8x16, 8);
+	vsraq_n_s8(arg_i8x16, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_s8(arg_i8x16, arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vsra_n_s16(arg_i16x4, arg_i16x4, 1);
+	vsra_n_s16(arg_i16x4, arg_i16x4, 16);
+	vsra_n_s16(arg_i16x4, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_s16(arg_i16x4, arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_s16(arg_i16x8, arg_i16x8, 1);
+	vsraq_n_s16(arg_i16x8, arg_i16x8, 16);
+	vsraq_n_s16(arg_i16x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_s16(arg_i16x8, arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vsra_n_s32(arg_i32x2, arg_i32x2, 1);
+	vsra_n_s32(arg_i32x2, arg_i32x2, 32);
+	vsra_n_s32(arg_i32x2, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_s32(arg_i32x2, arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_s32(arg_i32x4, arg_i32x4, 1);
+	vsraq_n_s32(arg_i32x4, arg_i32x4, 32);
+	vsraq_n_s32(arg_i32x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_s32(arg_i32x4, arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vsra_n_s64(arg_i64x1, arg_i64x1, 1);
+	vsra_n_s64(arg_i64x1, arg_i64x1, 64);
+	vsra_n_s64(arg_i64x1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_s64(arg_i64x1, arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_s64(arg_i64x2, arg_i64x2, 1);
+	vsraq_n_s64(arg_i64x2, arg_i64x2, 64);
+	vsraq_n_s64(arg_i64x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_s64(arg_i64x2, arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsrad_n_s64(arg_i64, arg_i64, 1);
+	vsrad_n_s64(arg_i64, arg_i64, 64);
+	vsrad_n_s64(arg_i64, arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsrad_n_s64(arg_i64, arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vsra_n_u8(arg_u8x8, arg_u8x8, 1);
+	vsra_n_u8(arg_u8x8, arg_u8x8, 8);
+	vsra_n_u8(arg_u8x8, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_u8(arg_u8x8, arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_u8(arg_u8x16, arg_u8x16, 1);
+	vsraq_n_u8(arg_u8x16, arg_u8x16, 8);
+	vsraq_n_u8(arg_u8x16, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_u8(arg_u8x16, arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vsra_n_u16(arg_u16x4, arg_u16x4, 1);
+	vsra_n_u16(arg_u16x4, arg_u16x4, 16);
+	vsra_n_u16(arg_u16x4, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_u16(arg_u16x4, arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_u16(arg_u16x8, arg_u16x8, 1);
+	vsraq_n_u16(arg_u16x8, arg_u16x8, 16);
+	vsraq_n_u16(arg_u16x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_u16(arg_u16x8, arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vsra_n_u32(arg_u32x2, arg_u32x2, 1);
+	vsra_n_u32(arg_u32x2, arg_u32x2, 32);
+	vsra_n_u32(arg_u32x2, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_u32(arg_u32x2, arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_u32(arg_u32x4, arg_u32x4, 1);
+	vsraq_n_u32(arg_u32x4, arg_u32x4, 32);
+	vsraq_n_u32(arg_u32x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_u32(arg_u32x4, arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vsra_n_u64(arg_u64x1, arg_u64x1, 1);
+	vsra_n_u64(arg_u64x1, arg_u64x1, 64);
+	vsra_n_u64(arg_u64x1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_u64(arg_u64x1, arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_u64(arg_u64x2, arg_u64x2, 1);
+	vsraq_n_u64(arg_u64x2, arg_u64x2, 64);
+	vsraq_n_u64(arg_u64x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_u64(arg_u64x2, arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsrad_n_u64(arg_u64, arg_u64, 1);
+	vsrad_n_u64(arg_u64, arg_u64, 64);
+	vsrad_n_u64(arg_u64, arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsrad_n_u64(arg_u64, arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vrsra_n_s8(arg_i8x8, arg_i8x8, 1);
+	vrsra_n_s8(arg_i8x8, arg_i8x8, 8);
+	vrsra_n_s8(arg_i8x8, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_s8(arg_i8x8, arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_s8(arg_i8x16, arg_i8x16, 1);
+	vrsraq_n_s8(arg_i8x16, arg_i8x16, 8);
+	vrsraq_n_s8(arg_i8x16, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_s8(arg_i8x16, arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vrsra_n_s16(arg_i16x4, arg_i16x4, 1);
+	vrsra_n_s16(arg_i16x4, arg_i16x4, 16);
+	vrsra_n_s16(arg_i16x4, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_s16(arg_i16x4, arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_s16(arg_i16x8, arg_i16x8, 1);
+	vrsraq_n_s16(arg_i16x8, arg_i16x8, 16);
+	vrsraq_n_s16(arg_i16x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_s16(arg_i16x8, arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vrsra_n_s32(arg_i32x2, arg_i32x2, 1);
+	vrsra_n_s32(arg_i32x2, arg_i32x2, 32);
+	vrsra_n_s32(arg_i32x2, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_s32(arg_i32x2, arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_s32(arg_i32x4, arg_i32x4, 1);
+	vrsraq_n_s32(arg_i32x4, arg_i32x4, 32);
+	vrsraq_n_s32(arg_i32x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_s32(arg_i32x4, arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vrsra_n_s64(arg_i64x1, arg_i64x1, 1);
+	vrsra_n_s64(arg_i64x1, arg_i64x1, 64);
+	vrsra_n_s64(arg_i64x1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_s64(arg_i64x1, arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_s64(arg_i64x2, arg_i64x2, 1);
+	vrsraq_n_s64(arg_i64x2, arg_i64x2, 64);
+	vrsraq_n_s64(arg_i64x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_s64(arg_i64x2, arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsrad_n_s64(arg_i64, arg_i64, 1);
+	vrsrad_n_s64(arg_i64, arg_i64, 64);
+	vrsrad_n_s64(arg_i64, arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsrad_n_s64(arg_i64, arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vrsra_n_u8(arg_u8x8, arg_u8x8, 1);
+	vrsra_n_u8(arg_u8x8, arg_u8x8, 8);
+	vrsra_n_u8(arg_u8x8, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_u8(arg_u8x8, arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_u8(arg_u8x16, arg_u8x16, 1);
+	vrsraq_n_u8(arg_u8x16, arg_u8x16, 8);
+	vrsraq_n_u8(arg_u8x16, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_u8(arg_u8x16, arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vrsra_n_u16(arg_u16x4, arg_u16x4, 1);
+	vrsra_n_u16(arg_u16x4, arg_u16x4, 16);
+	vrsra_n_u16(arg_u16x4, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_u16(arg_u16x4, arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_u16(arg_u16x8, arg_u16x8, 1);
+	vrsraq_n_u16(arg_u16x8, arg_u16x8, 16);
+	vrsraq_n_u16(arg_u16x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_u16(arg_u16x8, arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vrsra_n_u32(arg_u32x2, arg_u32x2, 1);
+	vrsra_n_u32(arg_u32x2, arg_u32x2, 32);
+	vrsra_n_u32(arg_u32x2, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_u32(arg_u32x2, arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_u32(arg_u32x4, arg_u32x4, 1);
+	vrsraq_n_u32(arg_u32x4, arg_u32x4, 32);
+	vrsraq_n_u32(arg_u32x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_u32(arg_u32x4, arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vrsra_n_u64(arg_u64x1, arg_u64x1, 1);
+	vrsra_n_u64(arg_u64x1, arg_u64x1, 64);
+	vrsra_n_u64(arg_u64x1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_u64(arg_u64x1, arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_u64(arg_u64x2, arg_u64x2, 1);
+	vrsraq_n_u64(arg_u64x2, arg_u64x2, 64);
+	vrsraq_n_u64(arg_u64x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_u64(arg_u64x2, arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsrad_n_u64(arg_u64, arg_u64, 1);
+	vrsrad_n_u64(arg_u64, arg_u64, 64);
+	vrsrad_n_u64(arg_u64, arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsrad_n_u64(arg_u64, arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_s16(int16x8_t arg_i16x8, int8x8_t arg_i8x8) {
+	vshrn_n_s16(arg_i16x8, 1);
+	vshrn_n_s16(arg_i16x8, 8);
+	vshrn_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_s16(arg_i8x8, arg_i16x8, 1);
+	vshrn_high_n_s16(arg_i8x8, arg_i16x8, 8);
+	vshrn_high_n_s16(arg_i8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_s16(arg_i8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_s32(int32x4_t arg_i32x4, int16x4_t arg_i16x4) {
+	vshrn_n_s32(arg_i32x4, 1);
+	vshrn_n_s32(arg_i32x4, 16);
+	vshrn_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_s32(arg_i16x4, arg_i32x4, 1);
+	vshrn_high_n_s32(arg_i16x4, arg_i32x4, 16);
+	vshrn_high_n_s32(arg_i16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_s32(arg_i16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_s64(int32x2_t arg_i32x2, int64x2_t arg_i64x2) {
+	vshrn_n_s64(arg_i64x2, 1);
+	vshrn_n_s64(arg_i64x2, 32);
+	vshrn_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_s64(arg_i32x2, arg_i64x2, 1);
+	vshrn_high_n_s64(arg_i32x2, arg_i64x2, 32);
+	vshrn_high_n_s64(arg_i32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_s64(arg_i32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_u16(uint16x8_t arg_u16x8, uint8x8_t arg_u8x8) {
+	vshrn_n_u16(arg_u16x8, 1);
+	vshrn_n_u16(arg_u16x8, 8);
+	vshrn_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_u16(arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_u16(arg_u8x8, arg_u16x8, 1);
+	vshrn_high_n_u16(arg_u8x8, arg_u16x8, 8);
+	vshrn_high_n_u16(arg_u8x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_u16(arg_u8x8, arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_u32(uint32x4_t arg_u32x4, uint16x4_t arg_u16x4) {
+	vshrn_n_u32(arg_u32x4, 1);
+	vshrn_n_u32(arg_u32x4, 16);
+	vshrn_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_u32(arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_u32(arg_u16x4, arg_u32x4, 1);
+	vshrn_high_n_u32(arg_u16x4, arg_u32x4, 16);
+	vshrn_high_n_u32(arg_u16x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_u32(arg_u16x4, arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_u64(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2) {
+	vshrn_n_u64(arg_u64x2, 1);
+	vshrn_n_u64(arg_u64x2, 32);
+	vshrn_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_u64(arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_u64(arg_u32x2, arg_u64x2, 1);
+	vshrn_high_n_u64(arg_u32x2, arg_u64x2, 32);
+	vshrn_high_n_u64(arg_u32x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_u64(arg_u32x2, arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_s16(int16x8_t arg_i16x8, uint8x8_t arg_u8x8, int16_t arg_i16, int8x8_t arg_i8x8) {
+	vqshrun_n_s16(arg_i16x8, 1);
+	vqshrun_n_s16(arg_i16x8, 8);
+	vqshrun_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrunh_n_s16(arg_i16, 1);
+	vqshrunh_n_s16(arg_i16, 8);
+	vqshrunh_n_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrunh_n_s16(arg_i16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrun_high_n_s16(arg_u8x8, arg_i16x8, 1);
+	vqshrun_high_n_s16(arg_u8x8, arg_i16x8, 8);
+	vqshrun_high_n_s16(arg_u8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_high_n_s16(arg_u8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_n_s16(arg_i16x8, 1);
+	vqshrn_n_s16(arg_i16x8, 8);
+	vqshrn_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrnh_n_s16(arg_i16, 1);
+	vqshrnh_n_s16(arg_i16, 8);
+	vqshrnh_n_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrnh_n_s16(arg_i16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_s16(arg_i8x8, arg_i16x8, 1);
+	vqshrn_high_n_s16(arg_i8x8, arg_i16x8, 8);
+	vqshrn_high_n_s16(arg_i8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_s16(arg_i8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_s32(int16x4_t arg_i16x4, int32_t arg_i32, int32x4_t arg_i32x4, uint16x4_t arg_u16x4) {
+	vqshrun_n_s32(arg_i32x4, 1);
+	vqshrun_n_s32(arg_i32x4, 16);
+	vqshrun_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshruns_n_s32(arg_i32, 1);
+	vqshruns_n_s32(arg_i32, 16);
+	vqshruns_n_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshruns_n_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrun_high_n_s32(arg_u16x4, arg_i32x4, 1);
+	vqshrun_high_n_s32(arg_u16x4, arg_i32x4, 16);
+	vqshrun_high_n_s32(arg_u16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_high_n_s32(arg_u16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_n_s32(arg_i32x4, 1);
+	vqshrn_n_s32(arg_i32x4, 16);
+	vqshrn_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrns_n_s32(arg_i32, 1);
+	vqshrns_n_s32(arg_i32, 16);
+	vqshrns_n_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrns_n_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_s32(arg_i16x4, arg_i32x4, 1);
+	vqshrn_high_n_s32(arg_i16x4, arg_i32x4, 16);
+	vqshrn_high_n_s32(arg_i16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_s32(arg_i16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_s64(uint32x2_t arg_u32x2, int64x2_t arg_i64x2, int32x2_t arg_i32x2, int64_t arg_i64) {
+	vqshrun_n_s64(arg_i64x2, 1);
+	vqshrun_n_s64(arg_i64x2, 32);
+	vqshrun_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrund_n_s64(arg_i64, 1);
+	vqshrund_n_s64(arg_i64, 32);
+	vqshrund_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrund_n_s64(arg_i64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrun_high_n_s64(arg_u32x2, arg_i64x2, 1);
+	vqshrun_high_n_s64(arg_u32x2, arg_i64x2, 32);
+	vqshrun_high_n_s64(arg_u32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_high_n_s64(arg_u32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_n_s64(arg_i64x2, 1);
+	vqshrn_n_s64(arg_i64x2, 32);
+	vqshrn_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrnd_n_s64(arg_i64, 1);
+	vqshrnd_n_s64(arg_i64, 32);
+	vqshrnd_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrnd_n_s64(arg_i64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_s64(arg_i32x2, arg_i64x2, 1);
+	vqshrn_high_n_s64(arg_i32x2, arg_i64x2, 32);
+	vqshrn_high_n_s64(arg_i32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_s64(arg_i32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_u16(uint16x8_t arg_u16x8, uint16_t arg_u16, uint8x8_t arg_u8x8) {
+	vqshrn_n_u16(arg_u16x8, 1);
+	vqshrn_n_u16(arg_u16x8, 8);
+	vqshrn_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_u16(arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrnh_n_u16(arg_u16, 1);
+	vqshrnh_n_u16(arg_u16, 8);
+	vqshrnh_n_u16(arg_u16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrnh_n_u16(arg_u16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_u16(arg_u8x8, arg_u16x8, 1);
+	vqshrn_high_n_u16(arg_u8x8, arg_u16x8, 8);
+	vqshrn_high_n_u16(arg_u8x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_u16(arg_u8x8, arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_u32(uint32x4_t arg_u32x4, uint32_t arg_u32, uint16x4_t arg_u16x4) {
+	vqshrn_n_u32(arg_u32x4, 1);
+	vqshrn_n_u32(arg_u32x4, 16);
+	vqshrn_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_u32(arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrns_n_u32(arg_u32, 1);
+	vqshrns_n_u32(arg_u32, 16);
+	vqshrns_n_u32(arg_u32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrns_n_u32(arg_u32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_u32(arg_u16x4, arg_u32x4, 1);
+	vqshrn_high_n_u32(arg_u16x4, arg_u32x4, 16);
+	vqshrn_high_n_u32(arg_u16x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_u32(arg_u16x4, arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_u64(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2, uint64_t arg_u64) {
+	vqshrn_n_u64(arg_u64x2, 1);
+	vqshrn_n_u64(arg_u64x2, 32);
+	vqshrn_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_u64(arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrnd_n_u64(arg_u64, 1);
+	vqshrnd_n_u64(arg_u64, 32);
+	vqshrnd_n_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrnd_n_u64(arg_u64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_u64(arg_u32x2, arg_u64x2, 1);
+	vqshrn_high_n_u64(arg_u32x2, arg_u64x2, 32);
+	vqshrn_high_n_u64(arg_u32x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_u64(arg_u32x2, arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_s16(int16x8_t arg_i16x8, uint8x8_t arg_u8x8,
+																int16_t arg_i16, int8x8_t arg_i8x8) {
+	vqrshrun_n_s16(arg_i16x8, 1);
+	vqrshrun_n_s16(arg_i16x8, 8);
+	vqrshrun_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrunh_n_s16(arg_i16, 1);
+	vqrshrunh_n_s16(arg_i16, 8);
+	vqrshrunh_n_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrunh_n_s16(arg_i16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrun_high_n_s16(arg_u8x8, arg_i16x8, 1);
+	vqrshrun_high_n_s16(arg_u8x8, arg_i16x8, 8);
+	vqrshrun_high_n_s16(arg_u8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_high_n_s16(arg_u8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_n_s16(arg_i16x8, 1);
+	vqrshrn_n_s16(arg_i16x8, 8);
+	vqrshrn_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrnh_n_s16(arg_i16, 1);
+	vqrshrnh_n_s16(arg_i16, 8);
+	vqrshrnh_n_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrnh_n_s16(arg_i16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_s16(arg_i8x8, arg_i16x8, 1);
+	vqrshrn_high_n_s16(arg_i8x8, arg_i16x8, 8);
+	vqrshrn_high_n_s16(arg_i8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_s16(arg_i8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_s32(int16x4_t arg_i16x4, int32_t arg_i32,
+																 int32x4_t arg_i32x4, uint16x4_t arg_u16x4) {
+	vqrshrun_n_s32(arg_i32x4, 1);
+	vqrshrun_n_s32(arg_i32x4, 16);
+	vqrshrun_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshruns_n_s32(arg_i32, 1);
+	vqrshruns_n_s32(arg_i32, 16);
+	vqrshruns_n_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshruns_n_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrun_high_n_s32(arg_u16x4, arg_i32x4, 1);
+	vqrshrun_high_n_s32(arg_u16x4, arg_i32x4, 16);
+	vqrshrun_high_n_s32(arg_u16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_high_n_s32(arg_u16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_n_s32(arg_i32x4, 1);
+	vqrshrn_n_s32(arg_i32x4, 16);
+	vqrshrn_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrns_n_s32(arg_i32, 1);
+	vqrshrns_n_s32(arg_i32, 16);
+	vqrshrns_n_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrns_n_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_s32(arg_i16x4, arg_i32x4, 1);
+	vqrshrn_high_n_s32(arg_i16x4, arg_i32x4, 16);
+	vqrshrn_high_n_s32(arg_i16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_s32(arg_i16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_s64(uint32x2_t arg_u32x2, int64x2_t arg_i64x2,
+																int32x2_t arg_i32x2, int64_t arg_i64) {
+	vqrshrun_n_s64(arg_i64x2, 1);
+	vqrshrun_n_s64(arg_i64x2, 32);
+	vqrshrun_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrund_n_s64(arg_i64, 1);
+	vqrshrund_n_s64(arg_i64, 32);
+	vqrshrund_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrund_n_s64(arg_i64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrun_high_n_s64(arg_u32x2, arg_i64x2, 1);
+	vqrshrun_high_n_s64(arg_u32x2, arg_i64x2, 32);
+	vqrshrun_high_n_s64(arg_u32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_high_n_s64(arg_u32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_n_s64(arg_i64x2, 1);
+	vqrshrn_n_s64(arg_i64x2, 32);
+	vqrshrn_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrnd_n_s64(arg_i64, 1);
+	vqrshrnd_n_s64(arg_i64, 32);
+	vqrshrnd_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrnd_n_s64(arg_i64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_s64(arg_i32x2, arg_i64x2, 1);
+	vqrshrn_high_n_s64(arg_i32x2, arg_i64x2, 32);
+	vqrshrn_high_n_s64(arg_i32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_s64(arg_i32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_u16(uint16x8_t arg_u16x8, uint16_t arg_u16,
+																uint8x8_t arg_u8x8) {
+	vqrshrn_n_u16(arg_u16x8, 1);
+	vqrshrn_n_u16(arg_u16x8, 8);
+	vqrshrn_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_u16(arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrnh_n_u16(arg_u16, 1);
+	vqrshrnh_n_u16(arg_u16, 8);
+	vqrshrnh_n_u16(arg_u16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrnh_n_u16(arg_u16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_u16(arg_u8x8, arg_u16x8, 1);
+	vqrshrn_high_n_u16(arg_u8x8, arg_u16x8, 8);
+	vqrshrn_high_n_u16(arg_u8x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_u16(arg_u8x8, arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_u32(uint32x4_t arg_u32x4, uint32_t arg_u32,
+																uint16x4_t arg_u16x4) {
+	vqrshrn_n_u32(arg_u32x4, 1);
+	vqrshrn_n_u32(arg_u32x4, 16);
+	vqrshrn_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_u32(arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrns_n_u32(arg_u32, 1);
+	vqrshrns_n_u32(arg_u32, 16);
+	vqrshrns_n_u32(arg_u32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrns_n_u32(arg_u32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_u32(arg_u16x4, arg_u32x4, 1);
+	vqrshrn_high_n_u32(arg_u16x4, arg_u32x4, 16);
+	vqrshrn_high_n_u32(arg_u16x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_u32(arg_u16x4, arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_u64(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2,
+																uint64_t arg_u64) {
+	vqrshrn_n_u64(arg_u64x2, 1);
+	vqrshrn_n_u64(arg_u64x2, 32);
+	vqrshrn_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_u64(arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrnd_n_u64(arg_u64, 1);
+	vqrshrnd_n_u64(arg_u64, 32);
+	vqrshrnd_n_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrnd_n_u64(arg_u64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_u64(arg_u32x2, arg_u64x2, 1);
+	vqrshrn_high_n_u64(arg_u32x2, arg_u64x2, 32);
+	vqrshrn_high_n_u64(arg_u32x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_u64(arg_u32x2, arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_s16(int16x8_t arg_i16x8, int8x8_t arg_i8x8) {
+	vrshrn_n_s16(arg_i16x8, 1);
+	vrshrn_n_s16(arg_i16x8, 8);
+	vrshrn_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_s16(arg_i8x8, arg_i16x8, 1);
+	vrshrn_high_n_s16(arg_i8x8, arg_i16x8, 8);
+	vrshrn_high_n_s16(arg_i8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_s16(arg_i8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_s32(int32x4_t arg_i32x4, int16x4_t arg_i16x4) {
+	vrshrn_n_s32(arg_i32x4, 1);
+	vrshrn_n_s32(arg_i32x4, 16);
+	vrshrn_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_s32(arg_i16x4, arg_i32x4, 1);
+	vrshrn_high_n_s32(arg_i16x4, arg_i32x4, 16);
+	vrshrn_high_n_s32(arg_i16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_s32(arg_i16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_s64(int32x2_t arg_i32x2, int64x2_t arg_i64x2) {
+	vrshrn_n_s64(arg_i64x2, 1);
+	vrshrn_n_s64(arg_i64x2, 32);
+	vrshrn_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_s64(arg_i32x2, arg_i64x2, 1);
+	vrshrn_high_n_s64(arg_i32x2, arg_i64x2, 32);
+	vrshrn_high_n_s64(arg_i32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_s64(arg_i32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_u16(uint16x8_t arg_u16x8, uint8x8_t arg_u8x8) {
+	vrshrn_n_u16(arg_u16x8, 1);
+	vrshrn_n_u16(arg_u16x8, 8);
+	vrshrn_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_u16(arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_u16(arg_u8x8, arg_u16x8, 1);
+	vrshrn_high_n_u16(arg_u8x8, arg_u16x8, 8);
+	vrshrn_high_n_u16(arg_u8x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_u16(arg_u8x8, arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_u32(uint32x4_t arg_u32x4, uint16x4_t arg_u16x4) {
+	vrshrn_n_u32(arg_u32x4, 1);
+	vrshrn_n_u32(arg_u32x4, 16);
+	vrshrn_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_u32(arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_u32(arg_u16x4, arg_u32x4, 1);
+	vrshrn_high_n_u32(arg_u16x4, arg_u32x4, 16);
+	vrshrn_high_n_u32(arg_u16x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_u32(arg_u16x4, arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_u64(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2) {
+	vrshrn_n_u64(arg_u64x2, 1);
+	vrshrn_n_u64(arg_u64x2, 32);
+	vrshrn_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_u64(arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_u64(arg_u32x2, arg_u64x2, 1);
+	vrshrn_high_n_u64(arg_u32x2, arg_u64x2, 32);
+	vrshrn_high_n_u64(arg_u32x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_u64(arg_u32x2, arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vsri_n_s8(arg_i8x8, arg_i8x8, 1);
+	vsri_n_s8(arg_i8x8, arg_i8x8, 8);
+	vsri_n_s8(arg_i8x8, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_s8(arg_i8x8, arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_s8(arg_i8x16, arg_i8x16, 1);
+	vsriq_n_s8(arg_i8x16, arg_i8x16, 8);
+	vsriq_n_s8(arg_i8x16, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_s8(arg_i8x16, arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vsri_n_s16(arg_i16x4, arg_i16x4, 1);
+	vsri_n_s16(arg_i16x4, arg_i16x4, 16);
+	vsri_n_s16(arg_i16x4, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_s16(arg_i16x4, arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_s16(arg_i16x8, arg_i16x8, 1);
+	vsriq_n_s16(arg_i16x8, arg_i16x8, 16);
+	vsriq_n_s16(arg_i16x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_s16(arg_i16x8, arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vsri_n_s32(arg_i32x2, arg_i32x2, 1);
+	vsri_n_s32(arg_i32x2, arg_i32x2, 32);
+	vsri_n_s32(arg_i32x2, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_s32(arg_i32x2, arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_s32(arg_i32x4, arg_i32x4, 1);
+	vsriq_n_s32(arg_i32x4, arg_i32x4, 32);
+	vsriq_n_s32(arg_i32x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_s32(arg_i32x4, arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vsri_n_s64(arg_i64x1, arg_i64x1, 1);
+	vsri_n_s64(arg_i64x1, arg_i64x1, 64);
+	vsri_n_s64(arg_i64x1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_s64(arg_i64x1, arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_s64(arg_i64x2, arg_i64x2, 1);
+	vsriq_n_s64(arg_i64x2, arg_i64x2, 64);
+	vsriq_n_s64(arg_i64x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_s64(arg_i64x2, arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsrid_n_s64(arg_i64, arg_i64, 1);
+	vsrid_n_s64(arg_i64, arg_i64, 64);
+	vsrid_n_s64(arg_i64, arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsrid_n_s64(arg_i64, arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vsri_n_u8(arg_u8x8, arg_u8x8, 1);
+	vsri_n_u8(arg_u8x8, arg_u8x8, 8);
+	vsri_n_u8(arg_u8x8, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_u8(arg_u8x8, arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_u8(arg_u8x16, arg_u8x16, 1);
+	vsriq_n_u8(arg_u8x16, arg_u8x16, 8);
+	vsriq_n_u8(arg_u8x16, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_u8(arg_u8x16, arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vsri_n_u16(arg_u16x4, arg_u16x4, 1);
+	vsri_n_u16(arg_u16x4, arg_u16x4, 16);
+	vsri_n_u16(arg_u16x4, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_u16(arg_u16x4, arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_u16(arg_u16x8, arg_u16x8, 1);
+	vsriq_n_u16(arg_u16x8, arg_u16x8, 16);
+	vsriq_n_u16(arg_u16x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_u16(arg_u16x8, arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vsri_n_u32(arg_u32x2, arg_u32x2, 1);
+	vsri_n_u32(arg_u32x2, arg_u32x2, 32);
+	vsri_n_u32(arg_u32x2, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_u32(arg_u32x2, arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_u32(arg_u32x4, arg_u32x4, 1);
+	vsriq_n_u32(arg_u32x4, arg_u32x4, 32);
+	vsriq_n_u32(arg_u32x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_u32(arg_u32x4, arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vsri_n_u64(arg_u64x1, arg_u64x1, 1);
+	vsri_n_u64(arg_u64x1, arg_u64x1, 64);
+	vsri_n_u64(arg_u64x1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_u64(arg_u64x1, arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_u64(arg_u64x2, arg_u64x2, 1);
+	vsriq_n_u64(arg_u64x2, arg_u64x2, 64);
+	vsriq_n_u64(arg_u64x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_u64(arg_u64x2, arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsrid_n_u64(arg_u64, arg_u64, 1);
+	vsrid_n_u64(arg_u64, arg_u64, 64);
+	vsrid_n_u64(arg_u64, arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsrid_n_u64(arg_u64, arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_p64(poly64x2_t arg_p64x2, poly64x1_t arg_p64x1) {
+	vsri_n_p64(arg_p64x1, arg_p64x1, 1);
+	vsri_n_p64(arg_p64x1, arg_p64x1, 64);
+	vsri_n_p64(arg_p64x1, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_p64(arg_p64x1, arg_p64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_p64(arg_p64x2, arg_p64x2, 1);
+	vsriq_n_p64(arg_p64x2, arg_p64x2, 64);
+	vsriq_n_p64(arg_p64x2, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_p64(arg_p64x2, arg_p64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vsri_n_p8(arg_p8x8, arg_p8x8, 1);
+	vsri_n_p8(arg_p8x8, arg_p8x8, 8);
+	vsri_n_p8(arg_p8x8, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_p8(arg_p8x8, arg_p8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_p8(arg_p8x16, arg_p8x16, 1);
+	vsriq_n_p8(arg_p8x16, arg_p8x16, 8);
+	vsriq_n_p8(arg_p8x16, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_p8(arg_p8x16, arg_p8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8) {
+	vsri_n_p16(arg_p16x4, arg_p16x4, 1);
+	vsri_n_p16(arg_p16x4, arg_p16x4, 16);
+	vsri_n_p16(arg_p16x4, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_p16(arg_p16x4, arg_p16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_p16(arg_p16x8, arg_p16x8, 1);
+	vsriq_n_p16(arg_p16x8, arg_p16x8, 16);
+	vsriq_n_p16(arg_p16x8, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_p16(arg_p16x8, arg_p16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c
new file mode 100644
index 000000000000..a35891c9adbb
--- /dev/null
+++ clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c
@@ -0,0 +1,620 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_vector_store_s8(int8x8_t arg_i8x8, int8x8x3_t arg_i8x8x3, int8_t* arg_i8_ptr,
+						  int8x16x3_t arg_i8x16x3,  int8x8x4_t arg_i8x8x4, int8x16x4_t arg_i8x16x4,
+						  int8x8x2_t arg_i8x8x2, int8x16_t arg_i8x16, int8x16x2_t arg_i8x16x2) {
+	vst1_lane_s8(arg_i8_ptr, arg_i8x8, 0);
+	vst1_lane_s8(arg_i8_ptr, arg_i8x8, 7);
+	vst1_lane_s8(arg_i8_ptr, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_s8(arg_i8_ptr, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_s8(arg_i8_ptr, arg_i8x16, 0);
+	vst1q_lane_s8(arg_i8_ptr, arg_i8x16, 15);
+	vst1q_lane_s8(arg_i8_ptr, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_s8(arg_i8_ptr, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_s8(arg_i8_ptr, arg_i8x8x2, 0);
+	vst2_lane_s8(arg_i8_ptr, arg_i8x8x2, 7);
+	vst2_lane_s8(arg_i8_ptr, arg_i8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_s8(arg_i8_ptr, arg_i8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_s8(arg_i8_ptr, arg_i8x8x3, 0);
+	vst3_lane_s8(arg_i8_ptr, arg_i8x8x3, 7);
+	vst3_lane_s8(arg_i8_ptr, arg_i8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_s8(arg_i8_ptr, arg_i8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_s8(arg_i8_ptr, arg_i8x8x4, 0);
+	vst4_lane_s8(arg_i8_ptr, arg_i8x8x4, 7);
+	vst4_lane_s8(arg_i8_ptr, arg_i8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_s8(arg_i8_ptr, arg_i8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_s8(arg_i8_ptr, arg_i8x16x2, 0);
+	vst2q_lane_s8(arg_i8_ptr, arg_i8x16x2, 15);
+	vst2q_lane_s8(arg_i8_ptr, arg_i8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_s8(arg_i8_ptr, arg_i8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_s8(arg_i8_ptr, arg_i8x16x3, 0);
+	vst3q_lane_s8(arg_i8_ptr, arg_i8x16x3, 15);
+	vst3q_lane_s8(arg_i8_ptr, arg_i8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_s8(arg_i8_ptr, arg_i8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_s8(arg_i8_ptr, arg_i8x16x4, 0);
+	vst4q_lane_s8(arg_i8_ptr, arg_i8x16x4, 15);
+	vst4q_lane_s8(arg_i8_ptr, arg_i8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_s8(arg_i8_ptr, arg_i8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_s16(int16x8x3_t arg_i16x8x3, int16x4_t arg_i16x4, int16x4x3_t arg_i16x4x3,
+						   int16x8_t arg_i16x8, int16_t* arg_i16_ptr, int16x8x2_t arg_i16x8x2,
+						   int16x8x4_t arg_i16x8x4, int16x4x4_t arg_i16x4x4, int16x4x2_t arg_i16x4x2) {
+	vst1_lane_s16(arg_i16_ptr, arg_i16x4, 0);
+	vst1_lane_s16(arg_i16_ptr, arg_i16x4, 3);
+	vst1_lane_s16(arg_i16_ptr, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_s16(arg_i16_ptr, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_s16(arg_i16_ptr, arg_i16x8, 0);
+	vst1q_lane_s16(arg_i16_ptr, arg_i16x8, 7);
+	vst1q_lane_s16(arg_i16_ptr, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_s16(arg_i16_ptr, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_s16(arg_i16_ptr, arg_i16x4x2, 0);
+	vst2_lane_s16(arg_i16_ptr, arg_i16x4x2, 3);
+	vst2_lane_s16(arg_i16_ptr, arg_i16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_s16(arg_i16_ptr, arg_i16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_s16(arg_i16_ptr, arg_i16x8x2, 0);
+	vst2q_lane_s16(arg_i16_ptr, arg_i16x8x2, 7);
+	vst2q_lane_s16(arg_i16_ptr, arg_i16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_s16(arg_i16_ptr, arg_i16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_s16(arg_i16_ptr, arg_i16x4x3, 0);
+	vst3_lane_s16(arg_i16_ptr, arg_i16x4x3, 3);
+	vst3_lane_s16(arg_i16_ptr, arg_i16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_s16(arg_i16_ptr, arg_i16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_s16(arg_i16_ptr, arg_i16x8x3, 0);
+	vst3q_lane_s16(arg_i16_ptr, arg_i16x8x3, 7);
+	vst3q_lane_s16(arg_i16_ptr, arg_i16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_s16(arg_i16_ptr, arg_i16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_s16(arg_i16_ptr, arg_i16x4x4, 0);
+	vst4_lane_s16(arg_i16_ptr, arg_i16x4x4, 3);
+	vst4_lane_s16(arg_i16_ptr, arg_i16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_s16(arg_i16_ptr, arg_i16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_s16(arg_i16_ptr, arg_i16x8x4, 0);
+	vst4q_lane_s16(arg_i16_ptr, arg_i16x8x4, 7);
+	vst4q_lane_s16(arg_i16_ptr, arg_i16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_s16(arg_i16_ptr, arg_i16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_s32(int32x4x3_t arg_i32x4x3, int32x4_t arg_i32x4, int32x2x2_t arg_i32x2x2,
+						   int32x2x3_t arg_i32x2x3, int32x4x4_t arg_i32x4x4, int32x4x2_t arg_i32x4x2,
+						   int32x2_t arg_i32x2, int32x2x4_t arg_i32x2x4, int32_t* arg_i32_ptr) {
+	vst1_lane_s32(arg_i32_ptr, arg_i32x2, 0);
+	vst1_lane_s32(arg_i32_ptr, arg_i32x2, 1);
+	vst1_lane_s32(arg_i32_ptr, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_s32(arg_i32_ptr, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_s32(arg_i32_ptr, arg_i32x4, 0);
+	vst1q_lane_s32(arg_i32_ptr, arg_i32x4, 3);
+	vst1q_lane_s32(arg_i32_ptr, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_s32(arg_i32_ptr, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_s32(arg_i32_ptr, arg_i32x2x2, 0);
+	vst2_lane_s32(arg_i32_ptr, arg_i32x2x2, 1);
+	vst2_lane_s32(arg_i32_ptr, arg_i32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_s32(arg_i32_ptr, arg_i32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_s32(arg_i32_ptr, arg_i32x4x2, 0);
+	vst2q_lane_s32(arg_i32_ptr, arg_i32x4x2, 3);
+	vst2q_lane_s32(arg_i32_ptr, arg_i32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_s32(arg_i32_ptr, arg_i32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_s32(arg_i32_ptr, arg_i32x2x3, 0);
+	vst3_lane_s32(arg_i32_ptr, arg_i32x2x3, 1);
+	vst3_lane_s32(arg_i32_ptr, arg_i32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_s32(arg_i32_ptr, arg_i32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_s32(arg_i32_ptr, arg_i32x4x3, 0);
+	vst3q_lane_s32(arg_i32_ptr, arg_i32x4x3, 3);
+	vst3q_lane_s32(arg_i32_ptr, arg_i32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_s32(arg_i32_ptr, arg_i32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_s32(arg_i32_ptr, arg_i32x2x4, 0);
+	vst4_lane_s32(arg_i32_ptr, arg_i32x2x4, 1);
+	vst4_lane_s32(arg_i32_ptr, arg_i32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_s32(arg_i32_ptr, arg_i32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_s32(arg_i32_ptr, arg_i32x4x4, 0);
+	vst4q_lane_s32(arg_i32_ptr, arg_i32x4x4, 3);
+	vst4q_lane_s32(arg_i32_ptr, arg_i32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_s32(arg_i32_ptr, arg_i32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_s64(int64x2x2_t arg_i64x2x2, int64_t* arg_i64_ptr, int64x1_t arg_i64x1,
+						   int64x2x4_t arg_i64x2x4, int64x1x4_t arg_i64x1x4, int64x1x2_t arg_i64x1x2,
+						   int64x1x3_t arg_i64x1x3, int64x2x3_t arg_i64x2x3, int64x2_t arg_i64x2) {
+	vst1_lane_s64(arg_i64_ptr, arg_i64x1, 0);
+	vst1_lane_s64(arg_i64_ptr, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_s64(arg_i64_ptr, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_s64(arg_i64_ptr, arg_i64x2, 0);
+	vst1q_lane_s64(arg_i64_ptr, arg_i64x2, 1);
+	vst1q_lane_s64(arg_i64_ptr, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_s64(arg_i64_ptr, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_s64(arg_i64_ptr, arg_i64x1x2, 0);
+	vst2_lane_s64(arg_i64_ptr, arg_i64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_s64(arg_i64_ptr, arg_i64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_s64(arg_i64_ptr, arg_i64x2x2, 0);
+	vst2q_lane_s64(arg_i64_ptr, arg_i64x2x2, 1);
+	vst2q_lane_s64(arg_i64_ptr, arg_i64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_s64(arg_i64_ptr, arg_i64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_s64(arg_i64_ptr, arg_i64x1x3, 0);
+	vst3_lane_s64(arg_i64_ptr, arg_i64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_s64(arg_i64_ptr, arg_i64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_s64(arg_i64_ptr, arg_i64x2x3, 0);
+	vst3q_lane_s64(arg_i64_ptr, arg_i64x2x3, 1);
+	vst3q_lane_s64(arg_i64_ptr, arg_i64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_s64(arg_i64_ptr, arg_i64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_s64(arg_i64_ptr, arg_i64x1x4, 0);
+	vst4_lane_s64(arg_i64_ptr, arg_i64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_s64(arg_i64_ptr, arg_i64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_s64(arg_i64_ptr, arg_i64x2x4, 0);
+	vst4q_lane_s64(arg_i64_ptr, arg_i64x2x4, 1);
+	vst4q_lane_s64(arg_i64_ptr, arg_i64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_s64(arg_i64_ptr, arg_i64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_u8(uint8x16_t arg_u8x16, uint8x16x3_t arg_u8x16x3, uint8x8x4_t arg_u8x8x4,
+						  uint8x16x2_t arg_u8x16x2, uint8x8_t arg_u8x8, uint8x8x3_t arg_u8x8x3,
+						  uint8x16x4_t arg_u8x16x4, uint8_t* arg_u8_ptr, uint8x8x2_t arg_u8x8x2) {
+	vst1_lane_u8(arg_u8_ptr, arg_u8x8, 0);
+	vst1_lane_u8(arg_u8_ptr, arg_u8x8, 7);
+	vst1_lane_u8(arg_u8_ptr, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_u8(arg_u8_ptr, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_u8(arg_u8_ptr, arg_u8x16, 0);
+	vst1q_lane_u8(arg_u8_ptr, arg_u8x16, 15);
+	vst1q_lane_u8(arg_u8_ptr, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_u8(arg_u8_ptr, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_u8(arg_u8_ptr, arg_u8x8x2, 0);
+	vst2_lane_u8(arg_u8_ptr, arg_u8x8x2, 7);
+	vst2_lane_u8(arg_u8_ptr, arg_u8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_u8(arg_u8_ptr, arg_u8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_u8(arg_u8_ptr, arg_u8x8x3, 0);
+	vst3_lane_u8(arg_u8_ptr, arg_u8x8x3, 7);
+	vst3_lane_u8(arg_u8_ptr, arg_u8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_u8(arg_u8_ptr, arg_u8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_u8(arg_u8_ptr, arg_u8x8x4, 0);
+	vst4_lane_u8(arg_u8_ptr, arg_u8x8x4, 7);
+	vst4_lane_u8(arg_u8_ptr, arg_u8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_u8(arg_u8_ptr, arg_u8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 0);
+	vst2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 15);
+	vst2q_lane_u8(arg_u8_ptr, arg_u8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 0);
+	vst3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 15);
+	vst3q_lane_u8(arg_u8_ptr, arg_u8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 0);
+	vst4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 15);
+	vst4q_lane_u8(arg_u8_ptr, arg_u8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_u16(uint16x8x3_t arg_u16x8x3, uint16x4x4_t arg_u16x4x4, uint16_t* arg_u16_ptr,
+						   uint16x4_t arg_u16x4, uint16x4x2_t arg_u16x4x2, uint16x4x3_t arg_u16x4x3,
+						   uint16x8_t arg_u16x8, uint16x8x2_t arg_u16x8x2, uint16x8x4_t arg_u16x8x4) {
+	vst1_lane_u16(arg_u16_ptr, arg_u16x4, 0);
+	vst1_lane_u16(arg_u16_ptr, arg_u16x4, 3);
+	vst1_lane_u16(arg_u16_ptr, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_u16(arg_u16_ptr, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_u16(arg_u16_ptr, arg_u16x8, 0);
+	vst1q_lane_u16(arg_u16_ptr, arg_u16x8, 7);
+	vst1q_lane_u16(arg_u16_ptr, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_u16(arg_u16_ptr, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_u16(arg_u16_ptr, arg_u16x4x2, 0);
+	vst2_lane_u16(arg_u16_ptr, arg_u16x4x2, 3);
+	vst2_lane_u16(arg_u16_ptr, arg_u16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_u16(arg_u16_ptr, arg_u16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 0);
+	vst2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 7);
+	vst2q_lane_u16(arg_u16_ptr, arg_u16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_u16(arg_u16_ptr, arg_u16x4x3, 0);
+	vst3_lane_u16(arg_u16_ptr, arg_u16x4x3, 3);
+	vst3_lane_u16(arg_u16_ptr, arg_u16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_u16(arg_u16_ptr, arg_u16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 0);
+	vst3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 7);
+	vst3q_lane_u16(arg_u16_ptr, arg_u16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_u16(arg_u16_ptr, arg_u16x4x4, 0);
+	vst4_lane_u16(arg_u16_ptr, arg_u16x4x4, 3);
+	vst4_lane_u16(arg_u16_ptr, arg_u16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_u16(arg_u16_ptr, arg_u16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 0);
+	vst4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 7);
+	vst4q_lane_u16(arg_u16_ptr, arg_u16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_u32(uint32x4x3_t arg_u32x4x3, uint32x2_t arg_u32x2, uint32x2x3_t arg_u32x2x3,
+						   uint32x4x4_t arg_u32x4x4, uint32x4_t arg_u32x4, uint32x2x2_t arg_u32x2x2,
+						   uint32_t* arg_u32_ptr, uint32x2x4_t arg_u32x2x4, uint32x4x2_t arg_u32x4x2) {
+	vst1_lane_u32(arg_u32_ptr, arg_u32x2, 0);
+	vst1_lane_u32(arg_u32_ptr, arg_u32x2, 1);
+	vst1_lane_u32(arg_u32_ptr, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_u32(arg_u32_ptr, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_u32(arg_u32_ptr, arg_u32x4, 0);
+	vst1q_lane_u32(arg_u32_ptr, arg_u32x4, 3);
+	vst1q_lane_u32(arg_u32_ptr, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_u32(arg_u32_ptr, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_u32(arg_u32_ptr, arg_u32x2x2, 0);
+	vst2_lane_u32(arg_u32_ptr, arg_u32x2x2, 1);
+	vst2_lane_u32(arg_u32_ptr, arg_u32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_u32(arg_u32_ptr, arg_u32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 0);
+	vst2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 3);
+	vst2q_lane_u32(arg_u32_ptr, arg_u32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_u32(arg_u32_ptr, arg_u32x2x3, 0);
+	vst3_lane_u32(arg_u32_ptr, arg_u32x2x3, 1);
+	vst3_lane_u32(arg_u32_ptr, arg_u32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_u32(arg_u32_ptr, arg_u32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 0);
+	vst3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 3);
+	vst3q_lane_u32(arg_u32_ptr, arg_u32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_u32(arg_u32_ptr, arg_u32x2x4, 0);
+	vst4_lane_u32(arg_u32_ptr, arg_u32x2x4, 1);
+	vst4_lane_u32(arg_u32_ptr, arg_u32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_u32(arg_u32_ptr, arg_u32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 0);
+	vst4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 3);
+	vst4q_lane_u32(arg_u32_ptr, arg_u32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_u64(uint64x2x3_t arg_u64x2x3, uint64x1_t arg_u64x1, uint64x2_t arg_u64x2,
+						   uint64x1x2_t arg_u64x1x2, uint64x2x2_t arg_u64x2x2, uint64x1x3_t arg_u64x1x3,
+						   uint64_t* arg_u64_ptr, uint64x2x4_t arg_u64x2x4, uint64x1x4_t arg_u64x1x4) {
+	vst1_lane_u64(arg_u64_ptr, arg_u64x1, 0);
+	vst1_lane_u64(arg_u64_ptr, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_u64(arg_u64_ptr, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_u64(arg_u64_ptr, arg_u64x2, 0);
+	vst1q_lane_u64(arg_u64_ptr, arg_u64x2, 1);
+	vst1q_lane_u64(arg_u64_ptr, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_u64(arg_u64_ptr, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_u64(arg_u64_ptr, arg_u64x1x2, 0);
+	vst2_lane_u64(arg_u64_ptr, arg_u64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_u64(arg_u64_ptr, arg_u64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 0);
+	vst2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 1);
+	vst2q_lane_u64(arg_u64_ptr, arg_u64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_u64(arg_u64_ptr, arg_u64x1x3, 0);
+	vst3_lane_u64(arg_u64_ptr, arg_u64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_u64(arg_u64_ptr, arg_u64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 0);
+	vst3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 1);
+	vst3q_lane_u64(arg_u64_ptr, arg_u64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_u64(arg_u64_ptr, arg_u64x1x4, 0);
+	vst4_lane_u64(arg_u64_ptr, arg_u64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_u64(arg_u64_ptr, arg_u64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 0);
+	vst4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 1);
+	vst4q_lane_u64(arg_u64_ptr, arg_u64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_p64(poly64x2x4_t arg_p64x2x4, poly64x1x3_t arg_p64x1x3, poly64x1_t arg_p64x1,
+						   poly64x2x2_t arg_p64x2x2, poly64x1x4_t arg_p64x1x4, poly64_t* arg_p64_ptr,
+						   poly64x1x2_t arg_p64x1x2, poly64x2_t arg_p64x2, poly64x2x3_t arg_p64x2x3) {
+	vst1_lane_p64(arg_p64_ptr, arg_p64x1, 0);
+	vst1_lane_p64(arg_p64_ptr, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_p64(arg_p64_ptr, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_p64(arg_p64_ptr, arg_p64x2, 0);
+	vst1q_lane_p64(arg_p64_ptr, arg_p64x2, 1);
+	vst1q_lane_p64(arg_p64_ptr, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_p64(arg_p64_ptr, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_p64(arg_p64_ptr, arg_p64x1x2, 0);
+	vst2_lane_p64(arg_p64_ptr, arg_p64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_p64(arg_p64_ptr, arg_p64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 0);
+	vst2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 1);
+	vst2q_lane_p64(arg_p64_ptr, arg_p64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_p64(arg_p64_ptr, arg_p64x1x3, 0);
+	vst3_lane_p64(arg_p64_ptr, arg_p64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_p64(arg_p64_ptr, arg_p64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 0);
+	vst3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 1);
+	vst3q_lane_p64(arg_p64_ptr, arg_p64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_p64(arg_p64_ptr, arg_p64x1x4, 0);
+	vst4_lane_p64(arg_p64_ptr, arg_p64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_p64(arg_p64_ptr, arg_p64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 0);
+	vst4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 1);
+	vst4q_lane_p64(arg_p64_ptr, arg_p64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_f16(float16x4x4_t arg_f16x4x4, float16x8_t arg_f16x8, float16x8x2_t arg_f16x8x2,
+						   float16x8x3_t arg_f16x8x3, float16x4x2_t arg_f16x4x2, float16x4x3_t arg_f16x4x3,
+						   float16x4_t arg_f16x4, float16_t* arg_f16_ptr, float16x8x4_t arg_f16x8x4) {
+	vst1_lane_f16(arg_f16_ptr, arg_f16x4, 0);
+	vst1_lane_f16(arg_f16_ptr, arg_f16x4, 3);
+	vst1_lane_f16(arg_f16_ptr, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_f16(arg_f16_ptr, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_f16(arg_f16_ptr, arg_f16x8, 0);
+	vst1q_lane_f16(arg_f16_ptr, arg_f16x8, 7);
+	vst1q_lane_f16(arg_f16_ptr, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_f16(arg_f16_ptr, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_f16(arg_f16_ptr, arg_f16x4x2, 0);
+	vst2_lane_f16(arg_f16_ptr, arg_f16x4x2, 3);
+	vst2_lane_f16(arg_f16_ptr, arg_f16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_f16(arg_f16_ptr, arg_f16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 0);
+	vst2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 7);
+	vst2q_lane_f16(arg_f16_ptr, arg_f16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_f16(arg_f16_ptr, arg_f16x4x3, 0);
+	vst3_lane_f16(arg_f16_ptr, arg_f16x4x3, 3);
+	vst3_lane_f16(arg_f16_ptr, arg_f16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_f16(arg_f16_ptr, arg_f16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 0);
+	vst3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 7);
+	vst3q_lane_f16(arg_f16_ptr, arg_f16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_f16(arg_f16_ptr, arg_f16x4x4, 0);
+	vst4_lane_f16(arg_f16_ptr, arg_f16x4x4, 3);
+	vst4_lane_f16(arg_f16_ptr, arg_f16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_f16(arg_f16_ptr, arg_f16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 0);
+	vst4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 7);
+	vst4q_lane_f16(arg_f16_ptr, arg_f16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_f32(float32x2x3_t arg_f32x2x3, float32x2x2_t arg_f32x2x2, float32x4_t arg_f32x4,
+						   float32x4x3_t arg_f32x4x3, float32_t* arg_f32_ptr, float32x4x4_t arg_f32x4x4,
+						   float32x2x4_t arg_f32x2x4, float32x2_t arg_f32x2, float32x4x2_t arg_f32x4x2) {
+	vst1_lane_f32(arg_f32_ptr, arg_f32x2, 0);
+	vst1_lane_f32(arg_f32_ptr, arg_f32x2, 1);
+	vst1_lane_f32(arg_f32_ptr, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_f32(arg_f32_ptr, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_f32(arg_f32_ptr, arg_f32x4, 0);
+	vst1q_lane_f32(arg_f32_ptr, arg_f32x4, 3);
+	vst1q_lane_f32(arg_f32_ptr, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_f32(arg_f32_ptr, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_f32(arg_f32_ptr, arg_f32x2x2, 0);
+	vst2_lane_f32(arg_f32_ptr, arg_f32x2x2, 1);
+	vst2_lane_f32(arg_f32_ptr, arg_f32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_f32(arg_f32_ptr, arg_f32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 0);
+	vst2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 3);
+	vst2q_lane_f32(arg_f32_ptr, arg_f32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_f32(arg_f32_ptr, arg_f32x2x3, 0);
+	vst3_lane_f32(arg_f32_ptr, arg_f32x2x3, 1);
+	vst3_lane_f32(arg_f32_ptr, arg_f32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_f32(arg_f32_ptr, arg_f32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 0);
+	vst3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 3);
+	vst3q_lane_f32(arg_f32_ptr, arg_f32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_f32(arg_f32_ptr, arg_f32x2x4, 0);
+	vst4_lane_f32(arg_f32_ptr, arg_f32x2x4, 1);
+	vst4_lane_f32(arg_f32_ptr, arg_f32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_f32(arg_f32_ptr, arg_f32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 0);
+	vst4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 3);
+	vst4q_lane_f32(arg_f32_ptr, arg_f32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_p8(poly8x16_t arg_p8x16, poly8x16x2_t arg_p8x16x2, poly8x8x3_t arg_p8x8x3,
+						  poly8x16x3_t arg_p8x16x3, poly8x16x4_t arg_p8x16x4, poly8x8x4_t arg_p8x8x4,
+						  poly8_t* arg_p8_ptr, poly8x8_t arg_p8x8, poly8x8x2_t arg_p8x8x2) {
+	vst1_lane_p8(arg_p8_ptr, arg_p8x8, 0);
+	vst1_lane_p8(arg_p8_ptr, arg_p8x8, 7);
+	vst1_lane_p8(arg_p8_ptr, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_p8(arg_p8_ptr, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_p8(arg_p8_ptr, arg_p8x16, 0);
+	vst1q_lane_p8(arg_p8_ptr, arg_p8x16, 15);
+	vst1q_lane_p8(arg_p8_ptr, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_p8(arg_p8_ptr, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_p8(arg_p8_ptr, arg_p8x8x2, 0);
+	vst2_lane_p8(arg_p8_ptr, arg_p8x8x2, 7);
+	vst2_lane_p8(arg_p8_ptr, arg_p8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_p8(arg_p8_ptr, arg_p8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_p8(arg_p8_ptr, arg_p8x8x3, 0);
+	vst3_lane_p8(arg_p8_ptr, arg_p8x8x3, 7);
+	vst3_lane_p8(arg_p8_ptr, arg_p8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_p8(arg_p8_ptr, arg_p8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_p8(arg_p8_ptr, arg_p8x8x4, 0);
+	vst4_lane_p8(arg_p8_ptr, arg_p8x8x4, 7);
+	vst4_lane_p8(arg_p8_ptr, arg_p8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_p8(arg_p8_ptr, arg_p8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 0);
+	vst2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 15);
+	vst2q_lane_p8(arg_p8_ptr, arg_p8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 0);
+	vst3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 15);
+	vst3q_lane_p8(arg_p8_ptr, arg_p8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 0);
+	vst4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 15);
+	vst4q_lane_p8(arg_p8_ptr, arg_p8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_p16(poly16x4x4_t arg_p16x4x4, poly16x4_t arg_p16x4, poly16x8x2_t arg_p16x8x2,
+						   poly16_t* arg_p16_ptr, poly16x8_t arg_p16x8, poly16x8x3_t arg_p16x8x3,
+						   poly16x4x3_t arg_p16x4x3, poly16x8x4_t arg_p16x8x4, poly16x4x2_t arg_p16x4x2) {
+	vst1_lane_p16(arg_p16_ptr, arg_p16x4, 0);
+	vst1_lane_p16(arg_p16_ptr, arg_p16x4, 3);
+	vst1_lane_p16(arg_p16_ptr, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_p16(arg_p16_ptr, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_p16(arg_p16_ptr, arg_p16x8, 0);
+	vst1q_lane_p16(arg_p16_ptr, arg_p16x8, 7);
+	vst1q_lane_p16(arg_p16_ptr, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_p16(arg_p16_ptr, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_p16(arg_p16_ptr, arg_p16x4x2, 0);
+	vst2_lane_p16(arg_p16_ptr, arg_p16x4x2, 3);
+	vst2_lane_p16(arg_p16_ptr, arg_p16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_p16(arg_p16_ptr, arg_p16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 0);
+	vst2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 7);
+	vst2q_lane_p16(arg_p16_ptr, arg_p16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_p16(arg_p16_ptr, arg_p16x4x3, 0);
+	vst3_lane_p16(arg_p16_ptr, arg_p16x4x3, 3);
+	vst3_lane_p16(arg_p16_ptr, arg_p16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_p16(arg_p16_ptr, arg_p16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 0);
+	vst3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 7);
+	vst3q_lane_p16(arg_p16_ptr, arg_p16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_p16(arg_p16_ptr, arg_p16x4x4, 0);
+	vst4_lane_p16(arg_p16_ptr, arg_p16x4x4, 3);
+	vst4_lane_p16(arg_p16_ptr, arg_p16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_p16(arg_p16_ptr, arg_p16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 0);
+	vst4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 7);
+	vst4q_lane_p16(arg_p16_ptr, arg_p16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_f64(float64_t* arg_f64_ptr, float64x2_t arg_f64x2, float64x1x3_t arg_f64x1x3,
+						   float64x2x4_t arg_f64x2x4, float64x1x4_t arg_f64x1x4, float64x1x2_t arg_f64x1x2,
+						   float64x1_t arg_f64x1, float64x2x2_t arg_f64x2x2, float64x2x3_t arg_f64x2x3) {
+	vst1_lane_f64(arg_f64_ptr, arg_f64x1, 0);
+	vst1_lane_f64(arg_f64_ptr, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_f64(arg_f64_ptr, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_f64(arg_f64_ptr, arg_f64x2, 0);
+	vst1q_lane_f64(arg_f64_ptr, arg_f64x2, 1);
+	vst1q_lane_f64(arg_f64_ptr, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_f64(arg_f64_ptr, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_f64(arg_f64_ptr, arg_f64x1x2, 0);
+	vst2_lane_f64(arg_f64_ptr, arg_f64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_f64(arg_f64_ptr, arg_f64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 0);
+	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 1);
+	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_f64(arg_f64_ptr, arg_f64x1x3, 0);
+	vst3_lane_f64(arg_f64_ptr, arg_f64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_f64(arg_f64_ptr, arg_f64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 0);
+	vst3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 1);
+	vst3q_lane_f64(arg_f64_ptr, arg_f64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_f64(arg_f64_ptr, arg_f64x1x4, 0);
+	vst4_lane_f64(arg_f64_ptr, arg_f64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_f64(arg_f64_ptr, arg_f64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 0);
+	vst4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 1);
+	vst4q_lane_f64(arg_f64_ptr, arg_f64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git clang/test/Sema/aarch64-neon-ranges.c clang/test/Sema/aarch64-neon-ranges.c
deleted file mode 100644
index 2e60a12c2638..000000000000
--- clang/test/Sema/aarch64-neon-ranges.c
+++ /dev/null
@@ -1,220 +0,0 @@
-// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -ffreestanding -fsyntax-only -verify %s
-
-// REQUIRES: aarch64-registered-target || arm-registered-target
-
-#include <arm_neon.h>
-
-void test_vext_8bit(int8x8_t small, int8x16_t big) {
-  vext_s8(small, small, 7);
-  vext_u8(small, small, 7);
-  vext_p8(small, small, 7);
-  vextq_s8(big, big, 15);
-  vextq_u8(big, big, 15);
-  vextq_p8(big, big, 15);
-
-  vext_s8(small, small, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vext_u8(small, small, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vext_p8(small, small, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vextq_s8(big, big, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vextq_u8(big, big, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vextq_p8(big, big, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_mul_lane_f64(float64x1_t small, float64x2_t big, float64x2_t rhs) {
-  vmul_lane_f64(small, small, 0);
-  vmul_laneq_f64(small, big, 1);
-  vmulq_lane_f64(big, small, 0);
-  vmulq_laneq_f64(big, big, 1);
-  vfma_lane_f64(small, small, small, 0);
-  vfma_laneq_f64(small, small, big, 1);
-  vfmaq_lane_f64(big, big, small, 0);
-  vfmaq_laneq_f64(big, big, big, 1);
-
-  vmul_lane_f64(small, small, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vmul_laneq_f64(small, big, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vfma_lane_f64(small, small, small, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vfma_laneq_f64(small, small, big, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vfmaq_laneq_f64(big, big, big, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_ld1st1(int8x8_t small, int8x16_t big, void *addr) {
-  vld1_lane_s8(addr, small, 7);
-  vld1_lane_s16(addr, small, 3);
-  vld1_lane_s32(addr, small, 1);
-  vld1_lane_s64(addr, small, 0);
-
-  vld1q_lane_s8(addr, big, 15);
-  vld1q_lane_s16(addr, big, 7);
-  vld1q_lane_s32(addr, big, 3);
-  vld1q_lane_s64(addr, big, 1);
-
-  vld1_lane_s8(addr, small, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld1_lane_s16(addr, small, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld1_lane_s32(addr, small, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld1_lane_s64(addr, small, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vld1q_lane_s8(addr, big, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld1q_lane_s16(addr, big, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld1q_lane_s32(addr, big, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld1q_lane_s64(addr, big, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst1_lane_s8(addr, small, 7);
-  vst1_lane_s16(addr, small, 3);
-  vst1_lane_s32(addr, small, 1);
-  vst1_lane_s64(addr, small, 0);
-
-  vst1q_lane_s8(addr, big, 15);
-  vst1q_lane_s16(addr, big, 7);
-  vst1q_lane_s32(addr, big, 3);
-  vst1q_lane_s64(addr, big, 1);
-
-  vst1_lane_s8(addr, small, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst1_lane_s16(addr, small, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst1_lane_s32(addr, small, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst1_lane_s64(addr, small, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst1q_lane_s8(addr, big, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst1q_lane_s16(addr, big, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst1q_lane_s32(addr, big, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst1q_lane_s64(addr, big, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_ld2st2(int8x8x2_t small8, int8x16x2_t big8,
-                 int16x4x2_t small16, int16x8x2_t big16,
-                 int32x2x2_t small32, int32x4x2_t big32,
-                 int64x1x2_t small64, int64x2x2_t big64,
-                 void *addr) {
-  vld2_lane_s8(addr, small8, 7);
-  vld2_lane_s16(addr, small16, 3);
-  vld2_lane_s32(addr, small32, 1);
-  vld2_lane_s64(addr, small64, 0);
-
-  vld2q_lane_s8(addr, big8, 15);
-  vld2q_lane_s16(addr, big16, 7);
-  vld2q_lane_s32(addr, big32, 3);
-  vld2q_lane_s64(addr, big64, 1);
-
-  vld2_lane_s8(addr, small8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld2_lane_s16(addr, small16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld2_lane_s32(addr, small32, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld2_lane_s64(addr, small64, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vld2q_lane_s8(addr, big8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld2q_lane_s16(addr, big16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld2q_lane_s32(addr, big32, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld2q_lane_s64(addr, big64, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst2_lane_s8(addr, small8, 7);
-  vst2_lane_s16(addr, small16, 3);
-  vst2_lane_s32(addr, small32, 1);
-  vst2_lane_s64(addr, small64, 0);
-
-  vst2q_lane_s8(addr, big8, 15);
-  vst2q_lane_s16(addr, big16, 7);
-  vst2q_lane_s32(addr, big32, 3);
-  vst2q_lane_s64(addr, big64, 1);
-
-  vst2_lane_s8(addr, small8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst2_lane_s16(addr, small16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst2_lane_s32(addr, small32, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst2_lane_s64(addr, small64, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst2q_lane_s8(addr, big8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst2q_lane_s16(addr, big16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst2q_lane_s32(addr, big32, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst2q_lane_s64(addr, big64, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_ld3st3(int8x8x3_t small8, int8x16x3_t big8,
-                 int16x4x3_t small16, int16x8x3_t big16,
-                 int32x2x3_t small32, int32x4x3_t big32,
-                 int64x1x3_t small64, int64x2x3_t big64,
-                 void *addr) {
-  vld3_lane_s8(addr, small8, 7);
-  vld3_lane_s16(addr, small16, 3);
-  vld3_lane_s32(addr, small32, 1);
-  vld3_lane_s64(addr, small64, 0);
-
-  vld3q_lane_s8(addr, big8, 15);
-  vld3q_lane_s16(addr, big16, 7);
-  vld3q_lane_s32(addr, big32, 3);
-  vld3q_lane_s64(addr, big64, 1);
-
-  vld3_lane_s8(addr, small8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld3_lane_s16(addr, small16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld3_lane_s32(addr, small32, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld3_lane_s64(addr, small64, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vld3q_lane_s8(addr, big8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld3q_lane_s16(addr, big16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld3q_lane_s32(addr, big32, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld3q_lane_s64(addr, big64, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst3_lane_s8(addr, small8, 7);
-  vst3_lane_s16(addr, small16, 3);
-  vst3_lane_s32(addr, small32, 1);
-  vst3_lane_s64(addr, small64, 0);
-
-  vst3q_lane_s8(addr, big8, 15);
-  vst3q_lane_s16(addr, big16, 7);
-  vst3q_lane_s32(addr, big32, 3);
-  vst3q_lane_s64(addr, big64, 1);
-
-  vst3_lane_s8(addr, small8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst3_lane_s16(addr, small16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst3_lane_s32(addr, small32, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst3_lane_s64(addr, small64, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst3q_lane_s8(addr, big8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst3q_lane_s16(addr, big16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst3q_lane_s32(addr, big32, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst3q_lane_s64(addr, big64, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_ld4st4(int8x8x4_t small8, int8x16x4_t big8,
-                 int16x4x4_t small16, int16x8x4_t big16,
-                 int32x2x4_t small32, int32x4x4_t big32,
-                 int64x1x4_t small64, int64x2x4_t big64,
-                 void *addr) {
-  vld4_lane_s8(addr, small8, 7);
-  vld4_lane_s16(addr, small16, 3);
-  vld4_lane_s32(addr, small32, 1);
-  vld4_lane_s64(addr, small64, 0);
-
-  vld4q_lane_s8(addr, big8, 15);
-  vld4q_lane_s16(addr, big16, 7);
-  vld4q_lane_s32(addr, big32, 3);
-  vld4q_lane_s64(addr, big64, 1);
-
-  vld4_lane_s8(addr, small8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld4_lane_s16(addr, small16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld4_lane_s32(addr, small32, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld4_lane_s64(addr, small64, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vld4q_lane_s8(addr, big8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld4q_lane_s16(addr, big16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld4q_lane_s32(addr, big32, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld4q_lane_s64(addr, big64, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst4_lane_s8(addr, small8, 7);
-  vst4_lane_s16(addr, small16, 3);
-  vst4_lane_s32(addr, small32, 1);
-  vst4_lane_s64(addr, small64, 0);
-
-  vst4q_lane_s8(addr, big8, 15);
-  vst4q_lane_s16(addr, big16, 7);
-  vst4q_lane_s32(addr, big32, 3);
-  vst4q_lane_s64(addr, big64, 1);
-
-  vst4_lane_s8(addr, small8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst4_lane_s16(addr, small16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst4_lane_s32(addr, small32, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst4_lane_s64(addr, small64, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst4q_lane_s8(addr, big8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst4q_lane_s16(addr, big16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst4q_lane_s32(addr, big32, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst4q_lane_s64(addr, big64, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
diff --git clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_imm_lane.cpp clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_imm_lane.cpp
index bca063385420..e405077b3de9 100644
--- clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_imm_lane.cpp
+++ clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_imm_lane.cpp
@@ -78,6 +78,14 @@ void test_range_0_7()
   SVE_ACLE_FUNC(svqrdmlsh_lane,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16(), -1);
   // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 7]}}
   SVE_ACLE_FUNC(svqrdmulh_lane,_s16,,)(svundef_s16(), svundef_s16(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 7]}}
+  SVE_ACLE_FUNC(svluti2_lane,_s16,,)(svundef_s16(), svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 7]}}
+  SVE_ACLE_FUNC(svluti2_lane,_u16,,)(svundef_u16(), svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 7]}}
+  SVE_ACLE_FUNC(svluti2_lane,_f16,,)(svundef_f16(), svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 7]}}
+  SVE_ACLE_FUNC(svluti2_lane,_bf16,,)(svundef_bf16(), svundef_u8(), -1);
 }
 
 void test_range_0_3()
@@ -146,6 +154,26 @@ void test_range_0_3()
   SVE_ACLE_FUNC(svqdmullb_lane,_s64,,)(svundef_s32(), svundef_s32(), 4);
   // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
   SVE_ACLE_FUNC(svqdmullt_lane,_s64,,)(svundef_s32(), svundef_s32(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  SVE_ACLE_FUNC(svluti2_lane,_s8,,)(svundef_s8(), svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  SVE_ACLE_FUNC(svluti2_lane,_u8,,)(svundef_u8(), svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  SVE_ACLE_FUNC(svluti4_lane,_s16,,)(svundef_s16(), svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  SVE_ACLE_FUNC(svluti4_lane,_u16,,)(svundef_u16(), svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  SVE_ACLE_FUNC(svluti4_lane,_f16,,)(svundef_f16(), svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  SVE_ACLE_FUNC(svluti4_lane,_bf16,,)(svundef_bf16(), svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  SVE_ACLE_FUNC(svluti4_lane,_s16,_x2,)(svcreate2_s16(svundef_s16(),svundef_s16()), svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  SVE_ACLE_FUNC(svluti4_lane,_u16,_x2,)(svcreate2_u16(svundef_u16(),svundef_u16()), svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  SVE_ACLE_FUNC(svluti4_lane,_f16,_x2,)(svcreate2_f16(svundef_f16(),svundef_f16()), svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  SVE_ACLE_FUNC(svluti4_lane,_bf16,_x2,)(svcreate2_bf16(svundef_bf16(),svundef_bf16()), svundef_u8(), -1);
 }
 
 void test_range_0_1()
@@ -180,4 +208,8 @@ void test_range_0_1()
   SVE_ACLE_FUNC(svqrdmlsh_lane,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64(), 2);
   // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 1]}}
   SVE_ACLE_FUNC(svqrdmulh_lane,_s64,,)(svundef_s64(), svundef_s64(), 2);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 1]}}
+  SVE_ACLE_FUNC(svluti4_lane,_s8,,)(svundef_s8(), svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 1]}}
+  SVE_ACLE_FUNC(svluti4_lane,_u8,,)(svundef_u8(), svundef_u8(), -1);
 }
diff --git clang/test/Sema/attr-btf_type_tag.cpp clang/test/Sema/attr-btf_type_tag.cpp
new file mode 100644
index 000000000000..cef78fff79b9
--- /dev/null
+++ clang/test/Sema/attr-btf_type_tag.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify=c -x c %s
+
+// c-no-diagnostics
+
+// Ensure that we diagnose the attribute as ignored in C++ but not in C.
+#ifdef __cplusplus
+static_assert(__builtin_is_implicit_lifetime(int __attribute__((btf_type_tag("user"))) *)); // expected-warning {{'btf_type_tag' attribute ignored}}
+#endif
+int __attribute__((btf_type_tag("user"))) *ptr; // expected-warning {{'btf_type_tag' attribute ignored}}
+
diff --git clang/test/Sema/attr-target-riscv.c clang/test/Sema/attr-target-riscv.c
index ed4e2915d6c6..35e2ec3986ad 100644
--- clang/test/Sema/attr-target-riscv.c
+++ clang/test/Sema/attr-target-riscv.c
@@ -4,3 +4,15 @@
 int __attribute__((target("arch=rv64g"))) foo(void) { return 0; }
 //expected-error@+1 {{redefinition of 'foo'}}
 int __attribute__((target("arch=rv64gc"))) foo(void) { return 0; }
+
+//expected-warning@+1 {{unsupported 'notafeature' in the 'target' attribute string; 'target' attribute ignored}}
+int __attribute__((target("arch=+notafeature"))) UnsupportFeature(void) { return 0; }
+
+//expected-warning@+1 {{unsupported 'notafeature' in the 'target' attribute string; 'target' attribute ignored}}
+int __attribute__((target("arch=-notafeature"))) UnsupportNegativeFeature(void) { return 0; }
+
+//expected-warning@+1 {{unsupported 'arch=+zba,zbb' in the 'target' attribute string; 'target' attribute ignored}}
+int __attribute__((target("arch=+zba,zbb"))) WithoutPlus(void) { return 0; }
+
+//expected-warning@+1 {{unsupported 'arch=zba' in the 'target' attribute string; 'target' attribute ignored}}
+int __attribute__((target("arch=zba"))) WithoutPlus2(void) { return 0; }
diff --git clang/test/Sema/builtin-unary-fp.c clang/test/Sema/builtin-unary-fp.c
index 3f4f65eeb73a..fb8e341156a5 100644
--- clang/test/Sema/builtin-unary-fp.c
+++ clang/test/Sema/builtin-unary-fp.c
@@ -14,4 +14,8 @@ void a(void) {
   check(__builtin_fpclassify(0, 1, 2, 3, 4.5, 5.0)); // expected-warning{{implicit conversion from 'double' to 'int' changes value from 4.5 to 4}}
   check(__builtin_fpclassify(0, 0, 0, 0, 1)); // expected-error{{too few arguments}}
   check(__builtin_fpclassify(0, 0, 0, 0, 0, 1, 0)); // expected-error{{too many arguments}}
+
+  check(__builtin_fpclassify(0,0,0,0,0, (invalid))); // expected-error{{use of undeclared identifier 'invalid'}}
+  check(__builtin_fpclassify(0,0,0,0,0, (inf))); // expected-error{{use of undeclared identifier 'inf'}}
+                                                // expected-error@-1{{reference to overloaded function could not be resolved}}
 }
diff --git clang/test/Sema/builtins-elementwise-math.c clang/test/Sema/builtins-elementwise-math.c
index 2673f1f519af..628274380ae5 100644
--- clang/test/Sema/builtins-elementwise-math.c
+++ clang/test/Sema/builtins-elementwise-math.c
@@ -275,8 +275,8 @@ void test_builtin_elementwise_min(int i, short s, double d, float4 v, int3 iv, u
 
 void test_builtin_elementwise_bitreverse(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
 
-  struct Foo s = __builtin_elementwise_ceil(f);
-  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
+  struct Foo s = __builtin_elementwise_bitreverse(i);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
 
   i = __builtin_elementwise_bitreverse();
   // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
diff --git clang/test/Sema/pre-c2x-compat.c clang/test/Sema/pre-c2x-compat.c
index fad472f1f72d..15bb9b58349f 100644
--- clang/test/Sema/pre-c2x-compat.c
+++ clang/test/Sema/pre-c2x-compat.c
@@ -1,3 +1,4 @@
 // RUN: %clang_cc1 %s -std=c2x -Wpre-c2x-compat -pedantic -fsyntax-only -verify
 
 int digit_seps = 123'456; // expected-warning {{digit separators are incompatible with C standards before C23}}
+unsigned char u8_char = u8'x'; // expected-warning {{unicode literals are incompatible with C standards before C23}}
diff --git clang/test/Sema/static-assert.c clang/test/Sema/static-assert.c
index 4e9e6b7ee558..d603bc19bb82 100644
--- clang/test/Sema/static-assert.c
+++ clang/test/Sema/static-assert.c
@@ -25,8 +25,12 @@ void foo(void) {
 #endif
 }
 
-_Static_assert(1, invalid); // expected-error {{expected string literal for diagnostic message in static_assert}} \
-                            // ext-warning {{'_Static_assert' is a C11 extension}}
+_Static_assert(1, invalid); // ext-warning {{'_Static_assert' is a C11 extension}}
+#ifndef __cplusplus
+// expected-error@-2 {{expected string literal for diagnostic message in static_assert}}
+#endif
+// cxx-error@-4 {{use of undeclared identifier 'invalid'}}
+// cxx-warning@-5 {{'static_assert' with a user-generated message is a C++26 extension}}
 
 struct A {
   int a;
diff --git clang/test/Sema/warn-lifetime-analysis-nocfg.cpp clang/test/Sema/warn-lifetime-analysis-nocfg.cpp
index cd1904db3271..59357d0730a7 100644
--- clang/test/Sema/warn-lifetime-analysis-nocfg.cpp
+++ clang/test/Sema/warn-lifetime-analysis-nocfg.cpp
@@ -275,6 +275,34 @@ int &danglingRawPtrFromLocal3() {
   return *o; // expected-warning {{reference to stack memory associated with local variable 'o' returned}}
 }
 
+// GH100384
+std::string_view containerWithAnnotatedElements() {
+  std::string_view c1 = std::vector<std::string>().at(0); // expected-warning {{object backing the pointer will be destroyed at the end of the full-expression}}
+  c1 = std::vector<std::string>().at(0); // expected-warning {{object backing the pointer}}
+
+  // no warning on constructing from gsl-pointer
+  std::string_view c2 = std::vector<std::string_view>().at(0);
+
+  std::vector<std::string> local;
+  return local.at(0); // expected-warning {{address of stack memory associated with local variable}}
+}
+
+std::string_view localUniquePtr(int i) {
+  std::unique_ptr<std::string> c1;
+  if (i)
+    return *c1; // expected-warning {{address of stack memory associated with local variable}}
+  std::unique_ptr<std::string_view> c2;
+  return *c2; // expect no-warning.
+}
+
+std::string_view localOptional(int i) {
+  std::optional<std::string> o;
+  if (i)
+    return o.value(); // expected-warning {{address of stack memory associated with local variable}}
+  std::optional<std::string_view> abc;
+  return abc.value(); // expect no warning
+}
+
 const char *danglingRawPtrFromTemp() {
   return std::basic_string<char>().c_str(); // expected-warning {{returning address of local temporary object}}
 }
@@ -498,4 +526,30 @@ std::string_view test2(int i, std::optional<std::string_view> a) {
     return std::move(*a);
   return std::move(a.value());
 }
+
+struct Foo;
+struct FooView {
+  FooView(const Foo& foo [[clang::lifetimebound]]);
+};
+FooView test3(int i, std::optional<Foo> a) {
+  if (i)
+    return *a; // expected-warning {{address of stack memory}}
+  return a.value(); // expected-warning {{address of stack memory}}
+}
+} // namespace GH93386
+
+namespace GH100549 {
+struct UrlAnalyzed {
+  UrlAnalyzed(std::string_view url [[clang::lifetimebound]]);
+};
+std::string StrCat(std::string_view, std::string_view);
+void test1() {
+  UrlAnalyzed url(StrCat("abc", "bcd")); // expected-warning {{object backing the pointer will be destroyed}}
+}
+
+std::string_view ReturnStringView(std::string_view abc [[clang::lifetimebound]]);
+
+void test() {
+  std::string_view svjkk1 = ReturnStringView(StrCat("bar", "x")); // expected-warning {{object backing the pointer will be destroyed at the end of the full-expression}}
 }
+} // namespace GH100549
diff --git clang/test/SemaCXX/PR97308.cpp clang/test/SemaCXX/PR97308.cpp
new file mode 100644
index 000000000000..7f550bc15d74
--- /dev/null
+++ clang/test/SemaCXX/PR97308.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -o - -emit-llvm -triple x86_64-linux-gnu %s
+
+// Check there are no crash issue CodeGen action.
+// https://github.com/llvm/llvm-project/pull/97308
+struct a {
+} constexpr b;
+class c {
+public:
+  c(a);
+};
+class B {
+public:
+  using d = int;
+  struct e {
+    enum { f } g;
+    int h;
+    c i;
+    d j{};
+  };
+};
+B::e k{B::e::f, int(), b};
diff --git clang/test/SemaCXX/builtin-is-within-lifetime.cpp clang/test/SemaCXX/builtin-is-within-lifetime.cpp
new file mode 100644
index 000000000000..62ff2681952c
--- /dev/null
+++ clang/test/SemaCXX/builtin-is-within-lifetime.cpp
@@ -0,0 +1,431 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-unused %s -verify=expected,cxx20 -Wno-vla-cxx-extension
+// RUN: %clang_cc1 -std=c++23 -Wno-unused %s -verify=expected,sincecxx23 -Wno-vla-cxx-extension
+// RUN: %clang_cc1 -std=c++26 -Wno-unused %s -verify=expected,sincecxx23 -Wno-vla-cxx-extension
+// RUN: %clang_cc1 -std=c++26 -DINLINE_NAMESPACE -Wno-unused %s -verify=expected,sincecxx23 -Wno-vla-cxx-extension
+
+inline constexpr void* operator new(__SIZE_TYPE__, void* p) noexcept { return p; }
+namespace std {
+template<typename T, typename... Args>
+constexpr T* construct_at(T* p, Args&&... args) { return ::new((void*)p) T(static_cast<Args&&>(args)...); }
+template<typename T>
+constexpr void destroy_at(T* p) { p->~T(); }
+template<typename T>
+struct allocator {
+  constexpr T* allocate(__SIZE_TYPE__ n) { return static_cast<T*>(::operator new(n * sizeof(T))); }
+  constexpr void deallocate(T* p, __SIZE_TYPE__) { ::operator delete(p); }
+};
+using nullptr_t = decltype(nullptr);
+template<typename T, T v>
+struct integral_constant { static constexpr T value = v; };
+template<bool v>
+using bool_constant = integral_constant<bool, v>;
+using true_type = bool_constant<true>;
+using false_type = bool_constant<false>;
+template<typename T>
+inline constexpr bool is_function_v = __is_function(T);
+#ifdef INLINE_NAMESPACE
+inline namespace __1 {
+#endif
+template<typename T> requires (!is_function_v<T>) // #std-constraint
+consteval bool is_within_lifetime(const T* p) noexcept { // #std-definition
+  return __builtin_is_within_lifetime(p);
+}
+#ifdef INLINE_NAMESPACE
+}
+#endif
+}
+
+consteval bool test_union(int& i, char& c) {
+  if (__builtin_is_within_lifetime(&i) || __builtin_is_within_lifetime(&c))
+    return false;
+  std::construct_at(&c, 1);
+  if (__builtin_is_within_lifetime(&i) || !__builtin_is_within_lifetime(&c))
+    return false;
+  std::construct_at(&i, 3);
+  if (!__builtin_is_within_lifetime(&i) || __builtin_is_within_lifetime(&c))
+    return false;
+  return true;
+}
+
+static_assert([]{
+  union { int i; char c; } u;
+  return test_union(u.i, u.c);
+}());
+static_assert([]{
+  union { int i; char c; };
+  return test_union(i, c);
+}());
+static_assert([]{
+  struct { union { int i; char c; }; } u;
+  return test_union(u.i, u.c);
+}());
+static_assert([]{
+  struct { union { int i; char c; } u; } r;
+  return test_union(r.u.i, r.u.c);
+}());
+
+consteval bool test_nested() {
+  union {
+    union { int i; char c; } u;
+    long l;
+  };
+  if (__builtin_is_within_lifetime(&l) || __builtin_is_within_lifetime(&u) || __builtin_is_within_lifetime(&u.i) || __builtin_is_within_lifetime(&u.c))
+    return false;
+  std::construct_at(&l);
+  if (!__builtin_is_within_lifetime(&l) || __builtin_is_within_lifetime(&u) || __builtin_is_within_lifetime(&u.i) || __builtin_is_within_lifetime(&u.c))
+    return false;
+  std::construct_at(&u);
+  std::construct_at(&u.i);
+  if (__builtin_is_within_lifetime(&l) || !__builtin_is_within_lifetime(&u) || !__builtin_is_within_lifetime(&u.i) || __builtin_is_within_lifetime(&u.c))
+    return false;
+  std::construct_at(&u.c);
+  if (__builtin_is_within_lifetime(&l) || !__builtin_is_within_lifetime(&u) || __builtin_is_within_lifetime(&u.i) || !__builtin_is_within_lifetime(&u.c))
+    return false;
+  return true;
+}
+static_assert(test_nested());
+
+consteval bool test_dynamic(bool read_after_deallocate) {
+  std::allocator<int> a;
+  int* p = a.allocate(1);
+  // a.allocate starts the lifetime of an array,
+  // the complete object of *p has started its lifetime
+  if (__builtin_is_within_lifetime(p))
+    return false;
+  std::construct_at(p);
+  if (!__builtin_is_within_lifetime(p))
+    return false;
+  std::destroy_at(p);
+  if (__builtin_is_within_lifetime(p))
+    return false;
+  a.deallocate(p, 1);
+  if (read_after_deallocate)
+    __builtin_is_within_lifetime(p); // expected-note {{read of heap allocated object that has been deleted}}
+  return true;
+}
+static_assert(test_dynamic(false));
+static_assert(test_dynamic(true));
+// expected-error@-1 {{static assertion expression is not an integral constant expression}}
+//   expected-note@-2 {{in call to 'test_dynamic(true)'}}
+
+consteval bool test_automatic(int read_dangling) {
+  int* p;
+  {
+    int x = 0;
+    p = &x;
+    if (!__builtin_is_within_lifetime(p))
+      return false;
+  }
+  {
+    int x = 0;
+    if (read_dangling == 1)
+      __builtin_is_within_lifetime(p); // expected-note {{read of object outside its lifetime is not allowed in a constant expression}}
+  }
+  if (read_dangling == 2)
+    __builtin_is_within_lifetime(p); // expected-note {{read of object outside its lifetime is not allowed in a constant expression}}
+  {
+    int x[4];
+    p = &x[2];
+    if (!__builtin_is_within_lifetime(p))
+      return false;
+  }
+  if (read_dangling == 3)
+    __builtin_is_within_lifetime(p); // expected-note {{read of object outside its lifetime is not allowed in a constant expression}}
+  std::nullptr_t* q;
+  {
+    std::nullptr_t np = nullptr;
+    q = &np;
+    if (!__builtin_is_within_lifetime(q))
+      return false;
+  }
+  if (read_dangling == 4)
+    __builtin_is_within_lifetime(q); // expected-note {{read of object outside its lifetime is not allowed in a constant expression}}
+  return true;
+}
+static_assert(test_automatic(0));
+static_assert(test_automatic(1));
+// expected-error@-1 {{static assertion expression is not an integral constant expression}}
+//   expected-note@-2 {{in call to 'test_automatic(1)'}}
+static_assert(test_automatic(2));
+// expected-error@-1 {{static assertion expression is not an integral constant expression}}
+//   expected-note@-2 {{in call to 'test_automatic(2)'}}
+static_assert(test_automatic(3));
+// expected-error@-1 {{static assertion expression is not an integral constant expression}}
+//   expected-note@-2 {{in call to 'test_automatic(3)'}}
+static_assert(test_automatic(4));
+// expected-error@-1 {{static assertion expression is not an integral constant expression}}
+//   expected-note@-2 {{in call to 'test_automatic(4)'}}
+
+
+consteval bool test_indeterminate() {
+  int x;
+  if (!__builtin_is_within_lifetime(&x))
+    return false;
+  bool b = true;
+  unsigned char c = __builtin_bit_cast(unsigned char, b);
+  if (!__builtin_is_within_lifetime(&c))
+    return false;
+  struct {} padding;
+  unsigned char y = __builtin_bit_cast(unsigned char, padding);
+  if (!__builtin_is_within_lifetime(&y))
+    return false;
+  return true;
+}
+static_assert(test_indeterminate());
+
+consteval bool test_volatile() {
+  int x;
+  if (!__builtin_is_within_lifetime(static_cast<volatile int*>(&x)) || !__builtin_is_within_lifetime(static_cast<volatile void*>(&x)))
+    return false;
+  volatile int y;
+  if (!__builtin_is_within_lifetime(const_cast<int*>(&y)) || !__builtin_is_within_lifetime(const_cast<void*>(static_cast<volatile void*>(&y))))
+    return false;
+  return true;
+}
+static_assert(test_volatile());
+
+constexpr bool self = __builtin_is_within_lifetime(&self);
+// expected-error@-1 {{constexpr variable 'self' must be initialized by a constant expression}}
+//   expected-note@-2 {{'__builtin_is_within_lifetime' cannot be called with a pointer to an object whose lifetime has not yet begun}}
+// expected-error@-3 {{call to consteval function '__builtin_is_within_lifetime' is not a constant expression}}
+//   expected-note@-4 {{initializer of 'self' is not a constant expression}}
+//   expected-note@-5 {{declared here}}
+constexpr int external{};
+static_assert(__builtin_is_within_lifetime(&external));
+void not_constexpr() {
+  __builtin_is_within_lifetime(&external);
+}
+void invalid_args() {
+  __builtin_is_within_lifetime(static_cast<int*>(nullptr));
+  // expected-error@-1 {{call to consteval function '__builtin_is_within_lifetime' is not a constant expression}}
+  //   expected-note@-2 {{'__builtin_is_within_lifetime' cannot be called with a null pointer}}
+
+  // FIXME: avoid function to pointer conversion on all consteval builtins
+  __builtin_is_within_lifetime(0);
+  // expected-error@-1 {{non-pointer argument to '__builtin_is_within_lifetime' is not allowed}}
+  // expected-error@-2 {{cannot take address of consteval function '__builtin_is_within_lifetime' outside of an immediate invocation}}
+  __builtin_is_within_lifetime();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+  // expected-error@-2 {{cannot take address of consteval function '__builtin_is_within_lifetime' outside of an immediate invocation}}
+  __builtin_is_within_lifetime(1, 2);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+  // expected-error@-2 {{cannot take address of consteval function '__builtin_is_within_lifetime' outside of an immediate invocation}}
+  __builtin_is_within_lifetime(&external, &external);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+  // expected-error@-2 {{cannot take address of consteval function '__builtin_is_within_lifetime' outside of an immediate invocation}}
+}
+
+constexpr struct {
+  union {
+    int i;
+    char c;
+  };
+  mutable int mi;  // #x-mi
+} x1{ .c = 2 };
+static_assert(!__builtin_is_within_lifetime(&x1.i));
+static_assert(__builtin_is_within_lifetime(&x1.c));
+static_assert(__builtin_is_within_lifetime(&x1.mi));
+// expected-error@-1 {{static assertion expression is not an integral constant expression}}
+//   expected-note@-2 {{read of mutable member 'mi' is not allowed in a constant expression}}
+//   expected-note@#x-mi {{declared here}}
+
+constexpr struct NSDMI { // #NSDMI
+  bool a = true;
+  bool b = __builtin_is_within_lifetime(&a); // #NSDMI-read
+} x2;
+// expected-error@-1 {{constexpr variable 'x2' must be initialized by a constant expression}}
+//   expected-note@#NSDMI-read {{'__builtin_is_within_lifetime' cannot be called with a pointer to an object whose lifetime has not yet begun}}
+//   expected-note@-3 {{in call to 'NSDMI()'}}
+// expected-error@-4 {{call to immediate function 'NSDMI::NSDMI' is not a constant expression}}
+//   expected-note@#NSDMI {{'NSDMI' is an immediate constructor because the default initializer of 'b' contains a call to a consteval function '__builtin_is_within_lifetime' and that call is not a constant expression}}
+//   expected-note@#NSDMI-read {{'__builtin_is_within_lifetime' cannot be called with a pointer to an object whose lifetime has not yet begun}}
+//   expected-note@-7 {{in call to 'NSDMI()'}}
+
+struct X3 {
+  consteval X3() {
+    __builtin_is_within_lifetime(this); // #X3-read
+  }
+} x3;
+// expected-error@-1 {{call to consteval function 'X3::X3' is not a constant expression}}
+//   expected-note@#X3-read {{'__builtin_is_within_lifetime' cannot be called with a pointer to an object whose lifetime has not yet begun}}
+//   expected-note@-3 {{in call to 'X3()'}}
+
+constexpr int i = 2;
+static_assert(__builtin_is_within_lifetime(const_cast<int*>(&i)));
+static_assert(__builtin_is_within_lifetime(const_cast<volatile int*>(&i)));
+static_assert(__builtin_is_within_lifetime(static_cast<const void*>(&i)));
+
+constexpr int arr[2]{};
+static_assert(__builtin_is_within_lifetime(arr));
+static_assert(__builtin_is_within_lifetime(arr + 0));
+static_assert(__builtin_is_within_lifetime(arr + 1));
+void f() {
+  __builtin_is_within_lifetime(&i + 1);
+  // expected-error@-1 {{call to consteval function '__builtin_is_within_lifetime' is not a constant expression}}
+  //   expected-note@-2 {{'__builtin_is_within_lifetime' cannot be called with a one-past-the-end pointer}}
+  __builtin_is_within_lifetime(arr + 2);
+  // expected-error@-1 {{call to consteval function '__builtin_is_within_lifetime' is not a constant expression}}
+  //   expected-note@-2 {{'__builtin_is_within_lifetime' cannot be called with a one-past-the-end pointer}}
+}
+
+template<typename T>
+consteval void disallow_function_types(bool b, const T* p) {
+  if (b) {
+    __builtin_is_within_lifetime(p); // expected-error {{function pointer argument to '__builtin_is_within_lifetime' is not allowed}}
+  }
+}
+void g() {
+  disallow_function_types<void ()>(false, &f);
+  // expected-note@-1 {{in instantiation of function template specialization 'disallow_function_types<void ()>' requested here}}
+}
+
+struct OptBool {
+  union { bool b; char c; };
+
+  // note: this assumes common implementation properties for bool and char:
+  // * sizeof(bool) == sizeof(char), and
+  // * the value representations for true and false are distinct
+  //   from the value representation for 2
+  constexpr OptBool() : c(2) { }
+  constexpr OptBool(bool b) : b(b) { }
+
+  constexpr auto has_value() const -> bool {
+    if consteval {  // cxx20-warning {{consteval if}}
+      return __builtin_is_within_lifetime(&b);   // during constant evaluation, cannot read from c
+    } else {
+      return c != 2;                        // during runtime, must read from c
+    }
+  }
+
+  constexpr auto operator*() const -> const bool& {
+    return b;
+  }
+};
+
+constexpr OptBool disengaged;
+constexpr OptBool engaged(true);
+static_assert(!disengaged.has_value());
+static_assert(engaged.has_value());
+static_assert(*engaged);
+
+namespace vlas {
+
+consteval bool f(int n) {
+  int vla[n]; // cxx20-error {{variable of non-literal type}}
+  return __builtin_is_within_lifetime(static_cast<void*>(&vla));
+}
+static_assert(f(1));
+
+consteval bool fail(int n) {
+  int vla[n]; // cxx20-error {{variable of non-literal type}}
+  return __builtin_is_within_lifetime(&vla); // expected-error {{variable length arrays are not supported in '__builtin_is_within_lifetime'}}
+}
+static_assert(fail(1)); // sincecxx23-error {{static assertion expression is not an integral constant expression}}
+
+consteval bool variably_modified(int n) {
+  int(* p)[n];
+  return __builtin_is_within_lifetime(&p);
+}
+static_assert(variably_modified(1));
+
+} // namespace vlas
+
+consteval bool partial_arrays() {
+  int arr[2];
+  if (!__builtin_is_within_lifetime(&arr) || !__builtin_is_within_lifetime(&arr[0]) || !__builtin_is_within_lifetime(&arr[1]))
+    return false;
+  std::destroy_at(&arr[0]);
+  if (!__builtin_is_within_lifetime(&arr) ||  __builtin_is_within_lifetime(&arr[0]) || !__builtin_is_within_lifetime(&arr[1]))
+    return false;
+  std::construct_at(&arr[0]);
+  if (!__builtin_is_within_lifetime(&arr) || !__builtin_is_within_lifetime(&arr[0]) || !__builtin_is_within_lifetime(&arr[1]))
+    return false;
+  return true;
+}
+static_assert(partial_arrays());
+
+consteval bool partial_members() {
+  struct S {
+    int x;
+    int y;
+  } s;
+  if (!__builtin_is_within_lifetime(&s) || !__builtin_is_within_lifetime(&s.x) || !__builtin_is_within_lifetime(&s.y))
+    return false;
+  std::destroy_at(&s.x);
+  if (!__builtin_is_within_lifetime(&s) ||  __builtin_is_within_lifetime(&s.x) || !__builtin_is_within_lifetime(&s.y))
+    return false;
+  std::construct_at(&s.x);
+  if (!__builtin_is_within_lifetime(&s) || !__builtin_is_within_lifetime(&s.x) || !__builtin_is_within_lifetime(&s.y))
+    return false;
+  return true;
+}
+
+struct NonTrivial {
+  constexpr NonTrivial() {}
+  constexpr NonTrivial(const NonTrivial&) {}
+  constexpr ~NonTrivial() {}
+};
+
+template<typename T>
+constexpr T& unmove(T&& temp) { return static_cast<T&>(temp); }
+
+consteval bool test_temporaries() {
+  static_assert(__builtin_is_within_lifetime(&unmove(0)));
+  static_assert(__builtin_is_within_lifetime(&unmove(NonTrivial{})));
+  if (!__builtin_is_within_lifetime(&unmove(0)))
+    return false;
+  if (!__builtin_is_within_lifetime(&unmove(NonTrivial{})))
+    return false;
+  return true;
+}
+static_assert(test_temporaries());
+
+constexpr const int& temp = 0;
+static_assert(__builtin_is_within_lifetime(&temp));
+
+template<typename T>
+constexpr T* test_dangling() {
+  T i; // expected-note 2 {{declared here}}
+  return &i; // expected-warning 2 {{address of stack memory associated with local variable 'i' returned}}
+}
+static_assert(__builtin_is_within_lifetime(test_dangling<int>())); // expected-note {{in instantiation of function template specialization}}
+// expected-error@-1 {{static assertion expression is not an integral constant expression}}
+//   expected-note@-2 {{read of variable whose lifetime has ended}}
+static_assert(__builtin_is_within_lifetime(test_dangling<int[1]>())); // expected-note {{in instantiation of function template specialization}}
+// expected-error@-1 {{static assertion expression is not an integral constant expression}}
+//   expected-note@-2 {{read of variable whose lifetime has ended}}
+
+template<auto F>
+concept CanCallAndPassToIsWithinLifetime = std::bool_constant<__builtin_is_within_lifetime(F())>::value;
+static_assert(CanCallAndPassToIsWithinLifetime<[]{ return &i; }>);
+static_assert(!CanCallAndPassToIsWithinLifetime<[]{ return static_cast<int*>(nullptr); }>);
+static_assert(!CanCallAndPassToIsWithinLifetime<[]{ return static_cast<void(*)()>(&f); }>);
+template<auto F> constexpr std::true_type sfinae() requires CanCallAndPassToIsWithinLifetime<F> { return {}; }
+template<auto F> std::false_type sfinae() { return {}; }
+static_assert(decltype(sfinae<[]{ return &i; }>())::value);
+static_assert(!decltype(sfinae<[]{ return static_cast<int*>(nullptr); }>())::value);
+std::true_type(* not_immediate)() = &sfinae<[]{ return &i; }>;
+
+void test_std_error_message() {
+  std::is_within_lifetime(static_cast<int*>(nullptr));
+  // expected-error@-1 {{call to consteval function 'std::is_within_lifetime<int>' is not a constant expression}}
+  //   expected-note@-2 {{'std::is_within_lifetime' cannot be called with a null pointer}}
+  //   expected-note@-3 {{in call to 'is_within_lifetime<int>(nullptr)'}}
+  std::is_within_lifetime<void()>(&test_std_error_message);
+  // expected-error@-1 {{no matching function for call to 'is_within_lifetime'}}
+  //   expected-note@#std-definition {{candidate template ignored: constraints not satisfied [with T = void ()]}}
+  //   expected-note@#std-constraint {{because '!is_function_v<void ()>' evaluated to false}}
+  std::is_within_lifetime(arr + 2);
+  // expected-error@-1 {{call to consteval function 'std::is_within_lifetime<int>' is not a constant expression}}
+  //   expected-note@-2 {{'std::is_within_lifetime' cannot be called with a one-past-the-end pointer}}
+  //   expected-note@-3 {{in call to 'is_within_lifetime<int>(&arr[2])'}}
+}
+struct XStd {
+  consteval XStd() {
+    std::is_within_lifetime(this); // #XStd-read
+  }
+} xstd;
+// expected-error@-1 {{call to consteval function 'XStd::XStd' is not a constant expression}}
+//   expected-note@#XStd-read {{'std::is_within_lifetime' cannot be called with a pointer to an object whose lifetime has not yet begun}}
+//   expected-note@#XStd-read {{in call to 'is_within_lifetime<XStd>(&)'}}
+//   expected-note@-4 {{in call to 'XStd()'}}
diff --git clang/test/SemaCXX/consteval-builtin.cpp clang/test/SemaCXX/consteval-builtin.cpp
new file mode 100644
index 000000000000..3ba95b4dbd9b
--- /dev/null
+++ clang/test/SemaCXX/consteval-builtin.cpp
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -std=c++23 -fsyntax-only -Wno-unused %s -verify=cxx20-cxx26
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -Wno-unused %s -verify=cxx20,cxx20-cxx26
+// RUN: %clang_cc1 -std=c++17 -fsyntax-only -Wno-unused %s -verify=precxx20,cxx11-cxx17
+// RUN: %clang_cc1 -std=c++14 -fsyntax-only -Wno-unused %s -verify=precxx20,cxx11-cxx17
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -Wno-unused %s -verify=precxx20,cxx11-cxx17
+// RUN: %clang_cc1 -std=c++03 -fsyntax-only -Wno-unused %s -verify=precxx20
+// RUN: %clang_cc1 -std=c++98 -fsyntax-only -Wno-unused %s -verify=precxx20
+// RUN: %clang_cc1 -x c -std=c23 -fsyntax-only -Wno-unused %s -verify=c
+
+#if __has_builtin(__builtin_is_within_lifetime)
+#error has the builtin
+#else
+#error does not have the builtin
+#endif
+// cxx20-cxx26-error@-4 {{has the builtin}}
+// precxx20-error@-3 {{does not have the builtin}}
+// c-error@-4 {{does not have the builtin}}
+
+#if __has_constexpr_builtin(__builtin_is_within_lifetime)
+#error has the constexpr builtin
+#else
+#error does not have the constexpr builtin
+#endif
+// cxx20-cxx26-error@-4 {{has the constexpr builtin}}
+// precxx20-error@-3 {{does not have the constexpr builtin}}
+// c-error@-4 {{does not have the constexpr builtin}}
+
+#if __cplusplus < 201103L
+#define static_assert __extension__ _Static_assert
+#define CONSTEXPR11
+#else
+#define CONSTEXPR11 constexpr
+#endif
+
+static const int i1 = 0;
+static_assert(__builtin_is_within_lifetime(&i1), "");
+// precxx20-error@-1 {{use of undeclared identifier '__builtin_is_within_lifetime'}}
+// c-error@-2 {{use of undeclared identifier '__builtin_is_within_lifetime'}}
+
+#if !defined(__cplusplus) || __cplusplus >= 201102L
+constexpr int i2 = 0;
+static_assert(__builtin_is_within_lifetime(&i2), "");
+// cxx11-cxx17-error@-1 {{use of undeclared identifier '__builtin_is_within_lifetime'}}
+// c-error@-2 {{use of undeclared identifier '__builtin_is_within_lifetime'}}
+#endif
+
+#ifdef __cplusplus
+template<typename T>
+CONSTEXPR11 bool f1(T i) {  // #f1
+  return __builtin_is_within_lifetime(&i);  // #f1-consteval-call
+}
+
+bool(&fp1)(int) = f1<int>;
+// cxx20-cxx26-error@-1 {{cannot take address of immediate function 'f1<int>' outside of an immediate invocation}}
+//   cxx20-cxx26-note@#f1 {{declared here}}
+//   cxx20-cxx26-note@#f1-consteval-call {{'f1<int>' is an immediate function because its body contains a call to a consteval function '__builtin_is_within_lifetime' and that call is not a constant expression}}
+// precxx20-error@#f1-consteval-call {{use of undeclared identifier '__builtin_is_within_lifetime'}}
+//   precxx20-note@-5 {{in instantiation of function template specialization 'f1<int>' requested here}}
+#else
+void f1(int i) {
+  __builtin_is_within_lifetime(&i);
+  // c-error@-1 {{use of undeclared identifier '__builtin_is_within_lifetime'}}
+}
+#endif
+
+#if __cplusplus >= 202002L
+constexpr void f2() {
+  int i = 0;
+  if consteval {  // cxx20-warning {{consteval if}}
+    __builtin_is_within_lifetime(&i);
+  }
+}
+void(&fp2)() = f2;
+
+constexpr void f3() {
+  __builtin_is_within_lifetime(&i1);
+}
+void(&fp3)() = f3;
+
+constexpr void f4() {
+  &__builtin_is_within_lifetime;
+  // cxx20-cxx26-error@-1 {{builtin functions must be directly called}}
+  // cxx20-cxx26-error@-2 {{cannot take address of consteval function '__builtin_is_within_lifetime' outside of an immediate invocation}}
+  __builtin_is_within_lifetime();
+  // cxx20-cxx26-error@-1 {{too few arguments to function call, expected 1, have 0}}
+  // cxx20-cxx26-error@-2 {{cannot take address of consteval function '__builtin_is_within_lifetime' outside of an immediate invocation}}
+  int* not_constexpr;
+  __builtin_is_within_lifetime(not_constexpr);
+  // cxx20-cxx26-error@-1 {{call to consteval function '__builtin_is_within_lifetime' is not a constant expression}}
+  //   cxx20-cxx26-note@-2 {{read of non-constexpr variable 'not_constexpr' is not allowed in a constant expression}}
+  //   cxx20-cxx26-note@-4 {{declared here}}
+}
+#endif
diff --git clang/test/SemaCXX/constexpr-default-arg.cpp clang/test/SemaCXX/constexpr-default-arg.cpp
index ec9b2927880b..901123bfb359 100644
--- clang/test/SemaCXX/constexpr-default-arg.cpp
+++ clang/test/SemaCXX/constexpr-default-arg.cpp
@@ -32,8 +32,8 @@ void test_default_arg2() {
 }
 
 // Check that multiple CXXDefaultInitExprs don't cause an assertion failure.
-struct A { int &&r = 0; }; // expected-note 2{{default member initializer}}
+struct A { int &&r = 0; };
 struct B { A x, y; };
-B b = {}; // expected-warning 2{{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported}}
+B b = {}; // expected-no-diagnostics
 
 }
diff --git clang/test/SemaCXX/cxx11-default-member-initializers.cpp clang/test/SemaCXX/cxx11-default-member-initializers.cpp
index dd8e9c6b7fc1..5e26c3a3b82c 100644
--- clang/test/SemaCXX/cxx11-default-member-initializers.cpp
+++ clang/test/SemaCXX/cxx11-default-member-initializers.cpp
@@ -27,6 +27,103 @@ class MemInit {
   C m = s;
 };
 
+namespace std {
+typedef decltype(sizeof(int)) size_t;
+
+// libc++'s implementation
+template <class _E> class initializer_list {
+  const _E *__begin_;
+  size_t __size_;
+
+  initializer_list(const _E *__b, size_t __s) : __begin_(__b), __size_(__s) {}
+
+public:
+  typedef _E value_type;
+  typedef const _E &reference;
+  typedef const _E &const_reference;
+  typedef size_t size_type;
+
+  typedef const _E *iterator;
+  typedef const _E *const_iterator;
+
+  initializer_list() : __begin_(nullptr), __size_(0) {}
+
+  size_t size() const { return __size_; }
+  const _E *begin() const { return __begin_; }
+  const _E *end() const { return __begin_ + __size_; }
+};
+} // namespace std
+
+#if __cplusplus >= 201703L
+
+// Test CXXDefaultInitExpr rebuild issue in 
+// https://github.com/llvm/llvm-project/pull/87933
+namespace test_rebuild {
+template <typename T, int> class C {
+public:
+  C(std::initializer_list<T>);
+};
+
+template <typename T> using Ptr = __remove_pointer(T) *;
+template <typename T> C(T) -> C<Ptr<T>, sizeof(T)>;
+
+class A {
+public:
+  template <typename T1, typename T2> T1 *some_func(T2 &&);
+};
+
+struct B : A {
+  int *ar = some_func<int>(C{some_func<int>(0)});
+  B() {}
+};
+
+int TestBody_got;
+template <int> class Vector {
+public:
+  Vector(std::initializer_list<int>);
+};
+template <typename... Ts> Vector(Ts...) -> Vector<sizeof...(Ts)>;
+class ProgramBuilder {
+public:
+  template <typename T, typename ARGS> int *create(ARGS);
+};
+
+struct TypeTest : ProgramBuilder {
+  int *str_f16 = create<int>(Vector{0});
+  TypeTest() {}
+};
+class TypeTest_Element_Test : TypeTest {
+  void TestBody();
+};
+void TypeTest_Element_Test::TestBody() {
+  int *expect = str_f16;
+  &TestBody_got != expect; // expected-warning {{inequality comparison result unused}}
+}
+} //  namespace test_rebuild
+
+// Test CXXDefaultInitExpr rebuild issue in 
+// https://github.com/llvm/llvm-project/pull/92527
+namespace test_rebuild2 {
+struct F {
+  int g;
+};
+struct H {};
+struct I {
+  I(const F &);
+  I(H);
+};
+struct L {
+  I i = I({.g = 0});
+};
+struct N : L {};
+
+void f() {
+  delete new L; // Ok
+  delete new N; // Ok
+}
+} // namespace test_rebuild2
+#endif // __cplusplus >= 201703L
+
 #if __cplusplus >= 202002L
 // This test ensures cleanup expressions are correctly produced
 // in the presence of default member initializers.
diff --git clang/test/SemaCXX/cxx20-default-compare.cpp clang/test/SemaCXX/cxx20-default-compare.cpp
index 7074ee885ac4..3e4673c31e48 100644
--- clang/test/SemaCXX/cxx20-default-compare.cpp
+++ clang/test/SemaCXX/cxx20-default-compare.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 %s -std=c++23 -verify -Wfloat-equal
 
+#include "Inputs/std-compare.h"
+
 struct Foo {
   float val;
   bool operator==(const Foo &) const;
@@ -15,3 +17,51 @@ bool operator==(const Foo &, const Foo &) = default;  // expected-warning {{comp
 
 // Declare the defaulted comparison function as a non-member function. Arguments are passed by value.
 bool operator==(Foo, Foo) = default;  // expected-warning {{comparing floating point with == or != is unsafe}} expected-note {{in defaulted equality comparison operator for 'Foo' first required here}}
+
+namespace GH102588 {
+struct A {
+  int i = 0;
+  constexpr operator int() const { return i; }
+  constexpr operator int&() { return ++i; }
+};
+
+struct B : A {
+  bool operator==(const B &) const = default;
+};
+
+constexpr bool f() {
+  B x;
+  return x == x;
+}
+
+static_assert(f());
+
+struct ConstOnly {
+  std::strong_ordering operator<=>(const ConstOnly&) const;
+  std::strong_ordering operator<=>(ConstOnly&) = delete;
+  friend bool operator==(const ConstOnly&, const ConstOnly&);
+  friend bool operator==(ConstOnly&, ConstOnly&) = delete;
+};
+
+struct MutOnly {
+  std::strong_ordering operator<=>(const MutOnly&) const = delete;;
+  std::strong_ordering operator<=>(MutOnly&);
+  friend bool operator==(const MutOnly&, const MutOnly&) = delete;;
+  friend bool operator==(MutOnly&, MutOnly&);
+};
+
+struct ConstCheck : ConstOnly {
+  friend std::strong_ordering operator<=>(const ConstCheck&, const ConstCheck&) = default;
+  std::strong_ordering operator<=>(ConstCheck const& __restrict) const __restrict = default;
+  friend bool operator==(const ConstCheck&, const ConstCheck&) = default;
+  bool operator==(this const ConstCheck&, const ConstCheck&) = default;
+};
+
+// FIXME: Non-reference explicit object parameter are rejected
+struct MutCheck : MutOnly {
+  friend bool operator==(MutCheck, MutCheck) = default;
+  // std::strong_ordering operator<=>(this MutCheck, MutCheck) = default;
+  friend std::strong_ordering operator<=>(MutCheck, MutCheck) = default;
+  // bool operator==(this MutCheck, MutCheck) = default;
+};
+}
diff --git clang/test/SemaCXX/cxx2a-template-lambdas.cpp clang/test/SemaCXX/cxx2a-template-lambdas.cpp
index fff524e77d3b..00ba291fbd19 100644
--- clang/test/SemaCXX/cxx2a-template-lambdas.cpp
+++ clang/test/SemaCXX/cxx2a-template-lambdas.cpp
@@ -97,3 +97,37 @@ void foo() {
 
 }
 #endif
+
+#if __cplusplus >= 202002L
+namespace {
+struct S {};
+constexpr S gs;
+void f() {
+  constexpr int x{};
+  const int y{};
+  auto b = []<int=x, int=y>{};
+  using A = decltype([]<int=x>{});
+
+  int z; // expected-note {{'z' declared here}}
+  auto c = []<int t=z>{
+    // expected-error@-1 {{no matching function for call to object of type}} \
+    // expected-error@-1 {{variable 'z' cannot be implicitly captured in a lambda with no capture-default specified}} \
+    // expected-note@-1 {{lambda expression begins here}} \
+    // expected-note@-1 4{{capture}} \
+    // expected-note@-1 {{candidate template ignored: substitution failure: reference to local variable 'z' declared in enclosing function}}
+    return t;
+  }();
+
+  auto class_type_global = []<S=gs>{};
+
+  static constexpr S static_s;
+  auto class_type_static = []<S=static_s>{};
+
+  constexpr S s;  // expected-note {{'s' declared here}}
+  auto class_type = []<S=s>{};
+  // expected-error@-1 {{variable 's' cannot be implicitly captured in a lambda with no capture-default specified}} \
+  // expected-note@-1 {{lambda expression begins here}} \
+  // expected-note@-1 4{{capture}}
+}
+}
+#endif
diff --git clang/test/SemaCXX/cxx2c-pack-indexing.cpp clang/test/SemaCXX/cxx2c-pack-indexing.cpp
index 7d7e80874621..962dbb8137f2 100644
--- clang/test/SemaCXX/cxx2c-pack-indexing.cpp
+++ clang/test/SemaCXX/cxx2c-pack-indexing.cpp
@@ -258,4 +258,16 @@ void f() {
     vars<0>::x<0>();
 }
 
+} // namespace GH105900
+
+namespace GH105903 {
+
+template <typename... opts> struct temp {
+  template <unsigned s> static auto x() -> opts... [s] {} // expected-note {{invalid index 0 for pack 'opts' of size 0}}
+};
+
+void f() {
+  temp<>::x<0>(); // expected-error {{no matching}}
 }
+
+} // namespace GH105903
diff --git clang/test/SemaCXX/eval-crashes.cpp clang/test/SemaCXX/eval-crashes.cpp
index 0865dafe4bf9..21e05f19be0c 100644
--- clang/test/SemaCXX/eval-crashes.cpp
+++ clang/test/SemaCXX/eval-crashes.cpp
@@ -25,11 +25,9 @@ namespace pr33140_0b {
 }
 
 namespace pr33140_2 {
-  // FIXME: The declaration of 'b' below should lifetime-extend two int
-  // temporaries.
-  struct A { int &&r = 0; }; // expected-note 2{{initializing field 'r' with default member initializer}}
+  struct A { int &&r = 0; };
   struct B { A x, y; };
-  B b = {}; // expected-warning 2{{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported}}
+  B b = {};
 }
 
 namespace pr33140_3 {
diff --git clang/test/SemaCXX/lambda-pack-expansion.cpp clang/test/SemaCXX/lambda-pack-expansion.cpp
index 77b2e244753a..0e60ecd87566 100644
--- clang/test/SemaCXX/lambda-pack-expansion.cpp
+++ clang/test/SemaCXX/lambda-pack-expansion.cpp
@@ -68,3 +68,29 @@ void f() {
 }
 
 }
+
+namespace GH61460 {
+
+template<typename... Ts>
+void f1(Ts... ts);
+
+template <typename... Ts> void g(Ts... p1s) {
+  (void)[&](auto... p2s) {
+    (
+        [&] {
+          p1s;
+          f1(p1s);
+          sizeof(p1s);
+          p2s;
+        },
+        ...);
+  };
+}
+
+template <typename... Ts> void g2(Ts... p1s) {
+  (void)[&](auto... p2s) { [&] { p1s; p2s; }; }; // expected-error {{unexpanded parameter pack 'p2s'}}
+}
+
+void f1() { g(); }
+
+} // namespace GH61460
diff --git clang/test/SemaCXX/pr61460.cpp clang/test/SemaCXX/pr61460.cpp
deleted file mode 100644
index 471b1b39d23c..000000000000
--- clang/test/SemaCXX/pr61460.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-// RUN: %clang_cc1 -std=c++17 %s -fsyntax-only -verify
-
-template <typename... Ts> void g(Ts... p1s) {
-  (void)[&](auto... p2s) { ([&] { p1s; p2s; }, ...); };
-}
-
-void f1() {
-  g();
-}
-
-template <typename... Ts> void g2(Ts... p1s) {
-  (void)[&](auto... p2s) { [&] { p1s; p2s; }; }; // expected-error {{expression contains unexpanded parameter pack 'p2s'}}
-}
diff --git clang/test/SemaCXX/source_location.cpp clang/test/SemaCXX/source_location.cpp
index 34177bfe287f..8b3a5d8dd332 100644
--- clang/test/SemaCXX/source_location.cpp
+++ clang/test/SemaCXX/source_location.cpp
@@ -989,3 +989,26 @@ void Test() {
 }
 
 #endif
+
+
+namespace GH67134 {
+template <int loc = std::source_location::current().line()>
+constexpr auto f(std::source_location loc2 = std::source_location::current()) { return loc; }
+
+int g = []() -> decltype(f()) { return 0; }();
+
+int call() {
+#if __cplusplus >= 202002L
+  return []<decltype(f()) = 0>() -> decltype(f()) { return  0; }();
+#endif
+  return []() -> decltype(f()) { return  0; }();
+}
+
+#if __cplusplus >= 202002L
+template<typename T>
+int Var = requires { []() -> decltype(f()){}; };
+int h = Var<int>;
+#endif
+
+
+}
diff --git clang/test/SemaCXX/static-assert-ext.cpp clang/test/SemaCXX/static-assert-ext.cpp
new file mode 100644
index 000000000000..05f7a0e96974
--- /dev/null
+++ clang/test/SemaCXX/static-assert-ext.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -std=c++98 -fsyntax-only -pedantic %s -verify=precxx11,precxx17,precxx26
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -pedantic %s -verify=since-cxx11,precxx17,precxx26 -Wc++98-compat
+// RUN: %clang_cc1 -std=c++17 -fsyntax-only -pedantic %s -verify=since-cxx11,since-cxx17,precxx26 -Wc++98-compat -Wpre-c++17-compat
+// RUN: %clang_cc1 -std=c++26 -fsyntax-only -pedantic %s -verify=since-cxx11,since-cxx17,since-cxx26 -Wc++98-compat -Wpre-c++17-compat -Wpre-c++26-compat
+
+static_assert(false, "a");
+// precxx11-error@-1 {{a type specifier is required for all declarations}}
+// since-cxx11-warning@-2 {{'static_assert' declarations are incompatible with C++98}}
+// since-cxx11-error@-3 {{static assertion failed: a}}
+
+#if __cplusplus >= 201103L
+static_assert(false);
+// since-cxx11-warning@-1 {{'static_assert' declarations are incompatible with C++98}}
+// precxx17-warning@-2 {{'static_assert' with no message is a C++17 extension}}
+// since-cxx17-warning@-3 {{'static_assert' with no message is incompatible with C++ standards before C++17}}
+// since-cxx11-error@-4 {{static assertion failed}}
+
+struct X {
+    static constexpr int size() { return 1; } // since-cxx11-warning {{'constexpr'}}
+    static constexpr const char* data() { return "b"; } // since-cxx11-warning {{'constexpr'}}
+};
+
+static_assert(false, X());
+// since-cxx11-warning@-1 {{'static_assert' declarations are incompatible with C++98}}
+// precxx26-warning@-2 {{'static_assert' with a user-generated message is a C++26 extension}}
+// since-cxx26-warning@-3 {{'static_assert' with a user-generated message is incompatible with C++ standards before C++26}}
+// since-cxx11-error@-4 {{static assertion failed: b}}
+#endif
diff --git clang/test/SemaCXX/sugar-common-types.cpp clang/test/SemaCXX/sugar-common-types.cpp
index e1c7578a66b9..39a762127811 100644
--- clang/test/SemaCXX/sugar-common-types.cpp
+++ clang/test/SemaCXX/sugar-common-types.cpp
@@ -90,13 +90,6 @@ N t19 = 0 ? (__underlying_type(EnumsX::X)){} : (__underlying_type(EnumsY::Y)){};
 N t20 = 0 ? (__underlying_type(EnumsX::X)){} : (__underlying_type(EnumsY::X)){};
 // expected-error@-1 {{rvalue of type '__underlying_type(Enums::X)' (aka 'int')}}
 
-using SBTF1 = SS1 [[clang::btf_type_tag("1")]];
-using SBTF2 = ::SS1 [[clang::btf_type_tag("1")]];
-using SBTF3 = ::SS1 [[clang::btf_type_tag("2")]];
-
-N t21 = 0 ? (SBTF1){} : (SBTF3){}; // expected-error {{from 'SS1'}}
-N t22 = 0 ? (SBTF1){} : (SBTF2){}; // expected-error {{from 'SS1 __attribute__((btf_type_tag("1")))' (aka 'SS1')}}
-
 using QX = const SB1 *;
 using QY = const ::SB1 *;
 N t23 = 0 ? (QX){} : (QY){}; // expected-error {{rvalue of type 'const SB1 *' (aka 'const SS1 *')}}
diff --git clang/test/SemaCXX/type-traits.cpp clang/test/SemaCXX/type-traits.cpp
index bf069d9bc082..91ef7786f11b 100644
--- clang/test/SemaCXX/type-traits.cpp
+++ clang/test/SemaCXX/type-traits.cpp
@@ -2052,7 +2052,6 @@ void is_implicit_lifetime(int n) {
   static_assert(__builtin_is_implicit_lifetime(float4));
   static_assert(__builtin_is_implicit_lifetime(align_value_int));
   static_assert(__builtin_is_implicit_lifetime(int[[clang::annotate_type("category2")]] *));
-  static_assert(__builtin_is_implicit_lifetime(int __attribute__((btf_type_tag("user"))) *));
   static_assert(__builtin_is_implicit_lifetime(EnforceReadOnlyPlacement));
   static_assert(__builtin_is_implicit_lifetime(int __attribute__((noderef)) *));
   static_assert(__builtin_is_implicit_lifetime(TypeVisibility));
@@ -4148,6 +4147,24 @@ class Template {};
 // Make sure we don't crash when instantiating a type
 static_assert(!__is_trivially_equality_comparable(Template<Template<int>>));
 
+
+struct S operator==(S, S);
+
+template <class> struct basic_string_view {};
+
+struct basic_string {
+  operator basic_string_view<int>() const;
+};
+
+template <class T>
+const bool is_trivially_equality_comparable = __is_trivially_equality_comparable(T);
+
+template <int = is_trivially_equality_comparable<basic_string> >
+void find();
+
+void func() { find(); }
+
+
 namespace hidden_friend {
 
 struct TriviallyEqualityComparable {
diff --git clang/test/SemaCXX/warn-thread-safety-analysis.cpp clang/test/SemaCXX/warn-thread-safety-analysis.cpp
index af9254508d80..8477200456d9 100644
--- clang/test/SemaCXX/warn-thread-safety-analysis.cpp
+++ clang/test/SemaCXX/warn-thread-safety-analysis.cpp
@@ -6077,24 +6077,20 @@ namespace ReturnScopedLockable {
 class Object {
 public:
   MutexLock lock() EXCLUSIVE_LOCK_FUNCTION(mutex) {
-    // TODO: False positive because scoped lock isn't destructed.
-    return MutexLock(&mutex); // expected-note {{mutex acquired here}}
-  }                           // expected-warning {{mutex 'mutex' is still held at the end of function}}
+    return MutexLock(&mutex);
+  }
 
   ReaderMutexLock lockShared() SHARED_LOCK_FUNCTION(mutex) {
-    // TODO: False positive because scoped lock isn't destructed.
-    return ReaderMutexLock(&mutex); // expected-note {{mutex acquired here}}
-  }                                 // expected-warning {{mutex 'mutex' is still held at the end of function}}
+    return ReaderMutexLock(&mutex);
+  }
 
   MutexLock adopt() EXCLUSIVE_LOCKS_REQUIRED(mutex) {
-    // TODO: False positive because scoped lock isn't destructed.
-    return MutexLock(&mutex, true); // expected-note {{mutex acquired here}}
-  }                                 // expected-warning {{mutex 'mutex' is still held at the end of function}}
+    return MutexLock(&mutex, true);
+  }
 
   ReaderMutexLock adoptShared() SHARED_LOCKS_REQUIRED(mutex) {
-    // TODO: False positive because scoped lock isn't destructed.
-    return ReaderMutexLock(&mutex, true); // expected-note {{mutex acquired here}}
-  }                                       // expected-warning {{mutex 'mutex' is still held at the end of function}}
+    return ReaderMutexLock(&mutex, true);
+  }
 
   int x GUARDED_BY(mutex);
   void needsLock() EXCLUSIVE_LOCKS_REQUIRED(mutex);
diff --git clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
new file mode 100644
index 000000000000..2bd12db93fd5
--- /dev/null
+++ clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN:            -verify %s
+
+namespace std {
+  inline namespace __1 {
+  template< class InputIt, class OutputIt >
+  OutputIt copy( InputIt first, InputIt last,
+		 OutputIt d_first );
+
+  struct iterator{};
+  template<typename T>
+  struct span {
+    T * ptr;
+    T * data();
+    unsigned size_bytes();
+    unsigned size();
+    iterator begin() const noexcept;
+    iterator end() const noexcept;
+  };
+
+  template<typename T>
+  struct basic_string {
+    T* p;
+    T *c_str();
+    T *data();
+    unsigned size_bytes();
+  };
+
+  typedef basic_string<char> string;
+  typedef basic_string<wchar_t> wstring;
+
+  // C function under std:
+  void memcpy();
+  void strcpy();
+  int snprintf( char* buffer, unsigned buf_size, const char* format, ... );
+  }
+}
+
+void f(char * p, char * q, std::span<char> s) {
+  std::memcpy();              // expected-warning{{function 'memcpy' is unsafe}}
+  std::strcpy();              // expected-warning{{function 'strcpy' is unsafe}}
+  std::__1::memcpy();              // expected-warning{{function 'memcpy' is unsafe}}
+  std::__1::strcpy();              // expected-warning{{function 'strcpy' is unsafe}}
+
+  /* Test printfs */
+  std::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer and size may not match}}
+  std::__1::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer and size may not match}}
+  std::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
+  std::__1::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
+}
+
+void v(std::string s1) {
+  std::snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+  std::__1::snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+}
+
+void g(char *begin, char *end, char *p, std::span<char> s) {
+  std::copy(begin, end, p); // no warn
+  std::copy(s.begin(), s.end(), s.begin()); // no warn
+}
diff --git clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
new file mode 100644
index 000000000000..a3716073609f
--- /dev/null
+++ clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -0,0 +1,125 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN:            -verify %s
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \
+// RUN:            -verify %s
+
+typedef struct {} FILE;
+void memcpy();
+void __asan_memcpy();
+void strcpy();
+void strcpy_s();
+void wcscpy_s();
+unsigned strlen( const char* str );
+int fprintf( FILE* stream, const char* format, ... );
+int printf( const char* format, ... );
+int sprintf( char* buffer, const char* format, ... );
+int swprintf( char* buffer, const char* format, ... );
+int snprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int snwprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int snwprintf_s( char* buffer, unsigned buf_size, const char* format, ... );
+int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int sscanf_s(const char * buffer, const char * format, ...);
+int sscanf(const char * buffer, const char * format, ... );
+int __asan_printf();
+
+namespace std {
+  template< class InputIt, class OutputIt >
+  OutputIt copy( InputIt first, InputIt last,
+		 OutputIt d_first );
+
+  struct iterator{};
+  template<typename T>
+  struct span {
+    T * ptr;
+    T * data();
+    unsigned size_bytes();
+    unsigned size();
+    iterator begin() const noexcept;
+    iterator end() const noexcept;
+  };
+
+  template<typename T>
+  struct basic_string {
+    T* p;
+    T *c_str();
+    T *data();
+    unsigned size_bytes();
+  };
+
+  typedef basic_string<char> string;
+  typedef basic_string<wchar_t> wstring;
+
+  // C function under std:
+  void memcpy();
+  void strcpy();
+}
+
+void f(char * p, char * q, std::span<char> s, std::span<char> s2) {
+  memcpy();                   // expected-warning{{function 'memcpy' is unsafe}}
+  std::memcpy();              // expected-warning{{function 'memcpy' is unsafe}}
+  __builtin_memcpy(p, q, 64); // expected-warning{{function '__builtin_memcpy' is unsafe}}
+  __builtin___memcpy_chk(p, q, 8, 64);  // expected-warning{{function '__builtin___memcpy_chk' is unsafe}}
+  __asan_memcpy();                      // expected-warning{{function '__asan_memcpy' is unsafe}}
+  strcpy();                   // expected-warning{{function 'strcpy' is unsafe}}
+  std::strcpy();              // expected-warning{{function 'strcpy' is unsafe}}
+  strcpy_s();                 // expected-warning{{function 'strcpy_s' is unsafe}}
+  wcscpy_s();                 // expected-warning{{function 'wcscpy_s' is unsafe}}
+
+  /* Test printfs */
+  fprintf((FILE*)p, "%s%d", p, *p);  // expected-warning{{function 'fprintf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+  printf("%s%d", // expected-warning{{function 'printf' is unsafe}}
+	 p,    // expected-note{{string argument is not guaranteed to be null-terminated}} note attached to the unsafe argument
+	 *p);
+  sprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'sprintf' is unsafe}} expected-note{{change to 'snprintf' for explicit bounds checking}}
+  swprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'swprintf' is unsafe}} expected-note{{change to 'snprintf' for explicit bounds checking}}
+  snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer and size may not match}}
+  snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer and size may not match}}
+  snwprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf' is unsafe}} expected-note{{buffer pointer and size may not match}}
+  snwprintf_s(                      // expected-warning{{function 'snwprintf_s' is unsafe}}
+	      s.data(),             // expected-note{{buffer pointer and size may not match}} // note attached to the buffer
+	      s2.size(),
+	      "%s%d", "hello", *p);
+  vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function 'vsnprintf' is unsafe}} expected-note{{'va_list' is unsafe}}
+  sscanf(p, "%s%d", "hello", *p);    // expected-warning{{function 'sscanf' is unsafe}}
+  sscanf_s(p, "%s%d", "hello", *p);  // expected-warning{{function 'sscanf_s' is unsafe}}
+  fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function 'fprintf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+  fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn
+  printf("%s%d", "hello", *p); // no warn
+  snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
+  snprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn
+  snwprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn
+  snwprintf_s(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn
+  strlen("hello");// no warn
+  __asan_printf();// a printf but no argument, so no warn
+}
+
+void v(std::string s1, int *p) {
+  snprintf(s1.data(), s1.size_bytes(), "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn
+  snprintf(s1.data(), s1.size_bytes(), s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str());      // no warn
+  printf("%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str());              // no warn
+  printf(s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str());                   // no warn
+  fprintf((FILE*)0, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str());   // no warn
+  fprintf((FILE*)0, s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str());        // no warn
+}
+
+
+void g(char *begin, char *end, char *p, std::span<char> s) {
+  std::copy(begin, end, p); // no warn
+  std::copy(s.begin(), s.end(), s.begin()); // no warn
+}
+
+// warning gets turned off
+void ff(char * p, char * q, std::span<char> s, std::span<char> s2) {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunsafe-buffer-usage-in-libc-call"
+  memcpy();
+  std::memcpy();
+  __builtin_memcpy(p, q, 64);
+  __builtin___memcpy_chk(p, q, 8, 64);
+  __asan_memcpy();
+  strcpy();
+  std::strcpy();
+  strcpy_s();
+  wcscpy_s();
+#pragma clang diagnostic pop
+}
diff --git clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
index 844311c3a51a..989931e41c0c 100644
--- clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
+++ clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
@@ -1,8 +1,6 @@
 // RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fsafe-buffer-usage-suggestions -verify %s
 
-// expected-no-diagnostics
-
 typedef unsigned __darwin_size_t;
 typedef __darwin_size_t size_t;
  #define bzero(s, n) __builtin_bzero(s, n)
-void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); }
+void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function '__builtin_bzero' is unsafe}}
diff --git clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl
new file mode 100644
index 000000000000..2450941f5d9b
--- /dev/null
+++ clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -fsyntax-only -verify %s
+
+typedef vector<float, 3> float3;
+
+StructuredBuffer<float3> Buffer;
+
+// expected-error@+2 {{class template 'StructuredBuffer' requires template arguments}}
+// expected-note@*:* {{template declaration from hidden source: template <class element_type> class StructuredBuffer}}
+StructuredBuffer BufferErr1;
+
+// expected-error@+2 {{too few template arguments for class template 'StructuredBuffer'}}
+// expected-note@*:* {{template declaration from hidden source: template <class element_type> class StructuredBuffer}}
+StructuredBuffer<> BufferErr2;
+
+[numthreads(1,1,1)]
+void main() {
+  (void)Buffer.h; // expected-error {{'h' is a private member of 'hlsl::StructuredBuffer<vector<float, 3> >'}}
+  // expected-note@* {{implicitly declared private here}}
+}
diff --git clang/test/SemaHLSL/BuiltIns/select-errors.hlsl clang/test/SemaHLSL/BuiltIns/select-errors.hlsl
new file mode 100644
index 000000000000..34b5fb6d54cd
--- /dev/null
+++ clang/test/SemaHLSL/BuiltIns/select-errors.hlsl
@@ -0,0 +1,119 @@
+// RUN: %clang_cc1 -finclude-default-header
+// -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only
+// -disable-llvm-passes -verify -verify-ignore-unexpected
+
+int test_no_arg() {
+  return select();
+  // expected-error@-1 {{no matching function for call to 'select'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template
+  // not viable: requires 3 arguments, but 0 were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not
+  // viable: requires 3 arguments, but 0 were provided}}
+}
+
+int test_too_few_args(bool p0) {
+  return select(p0);
+  // expected-error@-1 {{no matching function for call to 'select'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not
+  // viable: requires 3 arguments, but 1 was provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not
+  // viable: requires 3 arguments, but 1 was provided}}
+}
+
+int test_too_many_args(bool p0, int t0, int f0, int g0) {
+  return select<int>(p0, t0, f0, g0);
+  // expected-error@-1 {{no matching function for call to 'select'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not
+  // viable: requires 3 arguments, but 4 were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not
+  // viable: requires 3 arguments, but 4 were provided}}
+}
+
+int test_select_first_arg_wrong_type(int1 p0, int t0, int f0) {
+  return select(p0, t0, f0);
+  // expected-error@-1 {{no matching function for call to 'select'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not
+  // viable: no known conversion from 'vector<int, 1>' (vector of 1 'int' value)
+  // to 'bool' for 1st argument}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate template ignored: could
+  // not match 'vector<T, Sz>' against 'int'}}
+}
+
+int1 test_select_bool_vals_diff_vecs(bool p0, int1 t0, int1 f0) {
+  return select<int1>(p0, t0, f0);
+  // expected-warning@-1 {{implicit conversion truncates vector:
+  // 'vector<int, 2>' (vector of 2 'int' values) to 'vector<int, 1>'
+  // (vector of 1 'int' value)}}
+}
+
+int2 test_select_vector_vals_not_vecs(bool2 p0, int t0,
+                                               int f0) {
+  return select(p0, t0, f0);
+  // expected-error@-1 {{no matching function for call to 'select'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate template ignored:
+  // could not match 'vector<T, Sz>' against 'int'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not
+  // viable: no known conversion from 'vector<bool, 2>'
+  // (vector of 2 'bool' values) to 'bool' for 1st argument}}
+}
+
+int1 test_select_vector_vals_wrong_size(bool2 p0, int1 t0, int1 f0) {
+  return select<int,1>(p0, t0, f0); // produce warnings
+  // expected-warning@-1 {{implicit conversion truncates vector:
+  // 'vector<bool, 2>' (vector of 2 'bool' values) to 'vector<bool, 1>'
+  // (vector of 1 'bool' value)}}
+  // expected-warning@-2 {{implicit conversion truncates vector:
+  // 'vector<int, 2>' (vector of 2 'int' values) to 'vector<int, 1>'
+  // (vector of 1 'int' value)}}
+}
+
+// __builtin_hlsl_select tests
+int test_select_builtin_wrong_arg_count(bool p0, int t0) {
+  return __builtin_hlsl_select(p0, t0);
+  // expected-error@-1 {{too few arguments to function call, expected 3,
+  // have 2}}
+}
+
+// not a bool or a vector of bool. should be 2 errors.
+int test_select_builtin_first_arg_wrong_type1(int p0, int t0, int f0) {
+  return __builtin_hlsl_select(p0, t0, f0);
+  // expected-error@-1 {{passing 'int' to parameter of incompatible type
+  // 'bool'}}
+  // expected-error@-2 {{First argument to __builtin_hlsl_select must be of
+  // vector type}}
+  }
+
+int test_select_builtin_first_arg_wrong_type2(int1 p0, int t0, int f0) {
+  return __builtin_hlsl_select(p0, t0, f0);
+  // expected-error@-1 {{passing 'vector<int, 1>' (vector of 1 'int' value) to
+  // parameter of incompatible type 'bool'}}
+  // expected-error@-2 {{First argument to __builtin_hlsl_select must be of
+  // vector type}}
+}
+
+// if a bool last 2 args are of same type
+int test_select_builtin_bool_incompatible_args(bool p0, int t0, double f0) {
+  return __builtin_hlsl_select(p0, t0, f0);
+  // expected-error@-1 {{arguments are of different types ('int' vs 'double')}}
+}
+
+// if a vector second arg isnt a vector
+int2 test_select_builtin_second_arg_not_vector(bool2 p0, int t0, int2 f0) {
+  return __builtin_hlsl_select(p0, t0, f0);
+  // expected-error@-1 {{Second argument to __builtin_hlsl_select must be of
+  // vector type}}
+}
+
+// if a vector third arg isn't a vector
+int2 test_select_builtin_second_arg_not_vector(bool2 p0, int2 t0, int f0) {
+  return __builtin_hlsl_select(p0, t0, f0);
+  // expected-error@-1 {{Third argument to __builtin_hlsl_select must be of
+  // vector type}}
+}
+
+// if vector last 2 aren't same type (so both are vectors but wrong type)
+int2 test_select_builtin_diff_types(bool1 p0, int1 t0, float1 f0) {
+  return __builtin_hlsl_select(p0, t0, f0);
+  // expected-error@-1 {{arguments are of different types ('vector<int, [...]>'
+  // vs 'vector<float, [...]>')}}
+}
diff --git clang/test/SemaHLSL/BuiltIns/sign-errors.hlsl clang/test/SemaHLSL/BuiltIns/sign-errors.hlsl
new file mode 100644
index 000000000000..b67725fc77e5
--- /dev/null
+++ clang/test/SemaHLSL/BuiltIns/sign-errors.hlsl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected
+
+bool test_too_few_arg() {
+  return __builtin_hlsl_elementwise_sign();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+bool2 test_too_many_arg(float2 p0) {
+  return __builtin_hlsl_elementwise_sign(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+bool builtin_bool_to_float_type_promotion(bool p1) {
+  return __builtin_hlsl_elementwise_sign(p1);
+  // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
+}
diff --git clang/test/SemaHLSL/TruncationOverloadResolution.hlsl clang/test/SemaHLSL/TruncationOverloadResolution.hlsl
index f8cfe22372e8..0192c27860f1 100644
--- clang/test/SemaHLSL/TruncationOverloadResolution.hlsl
+++ clang/test/SemaHLSL/TruncationOverloadResolution.hlsl
@@ -1,24 +1,16 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -fnative-half-type -finclude-default-header -fsyntax-only %s -DERROR=1 -verify
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -fnative-half-type -finclude-default-header -fsyntax-only -Wconversion %s -DERROR=1 -verify
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -fnative-half-type -finclude-default-header -ast-dump %s | FileCheck %s
 
-// Case 1: Prefer exact-match truncation over conversion.
-void Half4Float4Double2(double2 D);
-void Half4Float4Double2(float4 D);
-void Half4Float4Double2(half4 D);
+// Case 1: Prefer conversion over exact match truncation.
 
 void Half4Float2(float2 D);
 void Half4Float2(half4 D);
 
 void Case1(float4 F, double4 D) {
   // CHECK: CallExpr {{.*}} 'void'
-  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2)' <FunctionToPointerDecay>
-  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2)' lvalue Function {{.*}} 'Half4Float4Double2' 'void (double2)'
-  Half4Float4Double2(D); // expected-warning{{implicit conversion truncates vector: 'double4' (aka 'vector<double, 4>') to 'vector<double, 2>' (vector of 2 'double' values)}}
-
-  // CHECK: CallExpr {{.*}} 'void'
-  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2)' <FunctionToPointerDecay>
-  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2)' lvalue Function {{.*}} 'Half4Float2' 'void (float2)'
-  Half4Float2(F); // expected-warning{{implicit conversion truncates vector: 'float4' (aka 'vector<float, 4>') to 'vector<float, 2>' (vector of 2 'float' values)}}
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(half4)' <FunctionToPointerDecay>
+  // CHECK-NEXT: DeclRefExpr {{.*}} 'void (half4)' lvalue Function {{.*}} 'Half4Float2' 'void (half4)'
+  Half4Float2(F); // expected-warning{{implicit conversion loses floating-point precision: 'float4' (aka 'vector<float, 4>') to 'vector<half, 4>' (vector of 4 'half' values)}}
 }
 
 // Case 2: Prefer promotions over conversions when truncating.
@@ -32,6 +24,42 @@ void Case2(float4 F) {
   Half2Double2(F); // expected-warning{{implicit conversion truncates vector: 'float4' (aka 'vector<float, 4>') to 'vector<double, 2>' (vector of 2 'double' values)}}
 }
 
+// Case 3: Allow truncation down to vector<T,1> or T.
+void Half(half H);
+void Float(float F);
+void Double(double D);
+
+void Half1(half1 H);
+void Float1(float1 F);
+void Double1(double1 D);
+
+void Case3(half3 H, float3 F, double3 D) {
+  Half(H); // expected-warning{{implicit conversion turns vector to scalar: 'half3' (aka 'vector<half, 3>') to 'half'}}
+  Half(F); // expected-warning{{implicit conversion turns vector to scalar: 'float3' (aka 'vector<float, 3>') to 'half'}}
+  Half(D); // expected-warning{{implicit conversion turns vector to scalar: 'double3' (aka 'vector<double, 3>') to 'half'}}
+
+  Float(H); // expected-warning{{implicit conversion turns vector to scalar: 'half3' (aka 'vector<half, 3>') to 'float'}}
+  Float(F); // expected-warning{{implicit conversion turns vector to scalar: 'float3' (aka 'vector<float, 3>') to 'float'}}
+  Float(D); // expected-warning{{implicit conversion turns vector to scalar: 'double3' (aka 'vector<double, 3>') to 'float'}}
+
+  Double(H); // expected-warning{{implicit conversion turns vector to scalar: 'half3' (aka 'vector<half, 3>') to 'double'}}
+  Double(F); // expected-warning{{implicit conversion turns vector to scalar: 'float3' (aka 'vector<float, 3>') to 'double'}}
+  Double(D); // expected-warning{{implicit conversion turns vector to scalar: 'double3' (aka 'vector<double, 3>') to 'double'}}
+
+  Half1(H); // expected-warning{{implicit conversion truncates vector: 'half3' (aka 'vector<half, 3>') to 'vector<half, 1>' (vector of 1 'half' value)}}
+  Half1(F); // expected-warning{{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'vector<half, 1>' (vector of 1 'half' value)}} expected-warning{{implicit conversion loses floating-point precision: 'float3' (aka 'vector<float, 3>') to 'vector<half, 1>' (vector of 1 'half' value)}}
+  Half1(D); // expected-warning{{implicit conversion truncates vector: 'double3' (aka 'vector<double, 3>') to 'vector<half, 1>' (vector of 1 'half' value)}} expected-warning{{implicit conversion loses floating-point precision: 'double3' (aka 'vector<double, 3>') to 'vector<half, 1>' (vector of 1 'half' value)}}
+
+  Float1(H); // expected-warning{{implicit conversion truncates vector: 'half3' (aka 'vector<half, 3>') to 'vector<float, 1>' (vector of 1 'float' value)}}
+  Float1(F); // expected-warning{{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'vector<float, 1>' (vector of 1 'float' value)}}
+  Float1(D); // expected-warning{{implicit conversion truncates vector: 'double3' (aka 'vector<double, 3>') to 'vector<float, 1>' (vector of 1 'float' value)}} expected-warning{{implicit conversion loses floating-point precision: 'double3' (aka 'vector<double, 3>') to 'vector<float, 1>' (vector of 1 'float' value)}}
+
+  Double1(H); // expected-warning{{implicit conversion truncates vector: 'half3' (aka 'vector<half, 3>') to 'vector<double, 1>' (vector of 1 'double' value)}}
+  Double1(F); // expected-warning{{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'vector<double, 1>' (vector of 1 'double' value)}}
+  Double1(D); // expected-warning{{implicit conversion truncates vector: 'double3' (aka 'vector<double, 3>') to 'vector<double, 1>' (vector of 1 'double' value)}}
+}
+
+
 #if ERROR
 // Case 3: Two promotions or two conversions are ambiguous.
 void Float2Double2(double2 D); // expected-note{{candidate function}}
@@ -46,7 +74,13 @@ void Half2Half3(half2 D); // expected-note{{candidate function}} expected-note{{
 void Double2Double3(double3 D); // expected-note{{candidate function}} expected-note{{candidate function}} expected-note{{candidate function}}
 void Double2Double3(double2 D); // expected-note{{candidate function}} expected-note{{candidate function}} expected-note{{candidate function}}
 
+void Half4Float4Double2(double2 D);
+void Half4Float4Double2(float4 D); // expected-note{{candidate function}}
+void Half4Float4Double2(half4 D); // expected-note{{candidate function}}
+
 void Case1(half4 H, float4 F, double4 D) {
+  Half4Float4Double2(D); // expected-error {{call to 'Half4Float4Double2' is ambiguous}}
+
   Float2Double2(H); // expected-error {{call to 'Float2Double2' is ambiguous}}
 
   Half2Float2(D); // expected-error {{call to 'Half2Float2' is ambiguous}}
@@ -55,8 +89,8 @@ void Case1(half4 H, float4 F, double4 D) {
   Half2Half3(F); // expected-error {{call to 'Half2Half3' is ambiguous}}
   Half2Half3(D); // expected-error {{call to 'Half2Half3' is ambiguous}}
   Half2Half3(H.xyz);
-  Half2Half3(F.xyz);
-  Half2Half3(D.xyz);
+  Half2Half3(F.xyz); // expected-warning {{implicit conversion loses floating-point precision: 'vector<float, 3>' (vector of 3 'float' values) to 'vector<half, 3>' (vector of 3 'half' values)}}
+  Half2Half3(D.xyz); // expected-warning {{implicit conversion loses floating-point precision: 'vector<double, 3>' (vector of 3 'double' values) to 'vector<half, 3>' (vector of 3 'half' values)}}
 
   Double2Double3(H); // expected-error {{call to 'Double2Double3' is ambiguous}}
   Double2Double3(F); // expected-error {{call to 'Double2Double3' is ambiguous}}
diff --git clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl
index 5088991f2e28..b1c75acbc16c 100644
--- clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl
+++ clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library  -x hlsl -finclude-default-header -verify %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -verify %s
 
 int2 ToTwoInts(int V) {
   return V.xy; // expected-error{{vector component access exceeds type 'vector<int, 1>' (vector of 1 'int' value)}}
@@ -16,6 +16,10 @@ float2 WhatIsHappening(float V) {
   return V.; // expected-error{{expected unqualified-id}}
 }
 
+float ScalarLValue(float2 V) {
+  (float)V = 4.0; // expected-error{{assignment to cast is illegal, lvalue casts are not supported}}
+}
+
 // These cases produce no error.
 
 float2 HowManyFloats(float V) {
diff --git clang/test/SemaHLSL/Types/BuiltinVector/TruncationConstantExpr.hlsl clang/test/SemaHLSL/Types/BuiltinVector/TruncationConstantExpr.hlsl
new file mode 100644
index 000000000000..918daa03d803
--- /dev/null
+++ clang/test/SemaHLSL/Types/BuiltinVector/TruncationConstantExpr.hlsl
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -std=hlsl202x -verify %s
+
+// expected-no-diagnostics
+
+// Note: these tests are a bit awkward because at time of writing we don't have a
+// good way to constexpr `any` for bool vector conditions, and the condition for
+// _Static_assert must be an integral constant.
+export void fn() {
+  // This compiling successfully verifies that the vector constant expression
+  // gets truncated to an integer at compile time for instantiation.
+  _Static_assert(((int)1.xxxx) + 0 == 1, "Woo!");
+
+  // This compiling successfully verifies that the vector constant expression
+  // gets truncated to a float at compile time for instantiation.
+  _Static_assert(((float)1.0.xxxx) + 0.0 == 1.0, "Woo!");
+
+  // This compiling successfully verifies that a vector can be truncated to a
+  // smaller vector, then truncated to a float as a constant expression.
+  _Static_assert(((float2)float4(6, 5, 4, 3)).x == 6, "Woo!");
+}
diff --git clang/test/SemaHLSL/Types/Traits/IsIntangibleType.hlsl clang/test/SemaHLSL/Types/Traits/IsIntangibleType.hlsl
new file mode 100644
index 000000000000..92cba1dcd4bd
--- /dev/null
+++ clang/test/SemaHLSL/Types/Traits/IsIntangibleType.hlsl
@@ -0,0 +1,78 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -verify %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -fnative-half-type -verify %s
+// expected-no-diagnostics
+
+_Static_assert(__builtin_hlsl_is_intangible(__hlsl_resource_t), "");
+// no need to check array of __hlsl_resource_t, arrays of sizeless types are not supported
+
+_Static_assert(!__builtin_hlsl_is_intangible(int), "");
+_Static_assert(!__builtin_hlsl_is_intangible(float3), "");
+_Static_assert(!__builtin_hlsl_is_intangible(half[4]), "");
+
+typedef __hlsl_resource_t Res;
+_Static_assert(__builtin_hlsl_is_intangible(const Res), "");
+// no need to check array of Res, arrays of sizeless types are not supported
+
+struct ABuffer {
+    const int i[10];
+    __hlsl_resource_t h;
+};
+_Static_assert(__builtin_hlsl_is_intangible(ABuffer), "");
+_Static_assert(__builtin_hlsl_is_intangible(ABuffer[10]), "");
+
+struct MyStruct {
+    half2 h2;
+    int3 i3;
+};
+_Static_assert(!__builtin_hlsl_is_intangible(MyStruct), "");
+_Static_assert(!__builtin_hlsl_is_intangible(MyStruct[10]), "");
+
+class MyClass {
+    int3 ivec;
+    float farray[12];
+    MyStruct ms;
+    ABuffer buf;
+};
+_Static_assert(__builtin_hlsl_is_intangible(MyClass), "");
+_Static_assert(__builtin_hlsl_is_intangible(MyClass[2]), "");
+
+union U {
+    double d[4];
+    Res buf;
+};
+_Static_assert(__builtin_hlsl_is_intangible(U), "");
+_Static_assert(__builtin_hlsl_is_intangible(U[100]), "");
+
+class MyClass2 {
+    int3 ivec;
+    float farray[12];
+    U u;
+};
+_Static_assert(__builtin_hlsl_is_intangible(MyClass2), "");
+_Static_assert(__builtin_hlsl_is_intangible(MyClass2[5]), "");
+
+class Simple {
+    int a;
+};
+
+template<typename T> struct TemplatedBuffer {
+    T a;
+    __hlsl_resource_t h;
+};
+_Static_assert(__builtin_hlsl_is_intangible(TemplatedBuffer<int>), "");
+
+struct MyStruct2 : TemplatedBuffer<float> {
+    float x;
+};
+_Static_assert(__builtin_hlsl_is_intangible(MyStruct2), "");
+
+struct MyStruct3 {
+    const TemplatedBuffer<float> TB[10];
+};
+_Static_assert(__builtin_hlsl_is_intangible(MyStruct3), "");
+
+template<typename T> struct SimpleTemplate {
+    T a;
+};
+_Static_assert(__builtin_hlsl_is_intangible(SimpleTemplate<__hlsl_resource_t>), "");
+_Static_assert(!__builtin_hlsl_is_intangible(SimpleTemplate<float>), "");
diff --git clang/test/SemaHLSL/Types/Traits/IsIntangibleTypeErrors.hlsl clang/test/SemaHLSL/Types/Traits/IsIntangibleTypeErrors.hlsl
new file mode 100644
index 000000000000..0803086749bd
--- /dev/null
+++ clang/test/SemaHLSL/Types/Traits/IsIntangibleTypeErrors.hlsl
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library  -finclude-default-header -verify %s
+
+struct Undefined; // expected-note {{forward declaration of 'Undefined'}}
+_Static_assert(!__builtin_hlsl_is_intangible(Undefined), ""); // expected-error{{incomplete type 'Undefined' used in type trait expression}}
+
+void fn(int X) {
+  // expected-error@#vla {{variable length arrays are not supported for the current target}}
+  // expected-error@#vla {{variable length arrays are not supported in '__builtin_hlsl_is_intangible'}}
+  // expected-warning@#vla {{variable length arrays in C++ are a Clang extension}}
+  _Static_assert(!__builtin_hlsl_is_intangible(int[X]), ""); // #vla
+}
diff --git clang/test/SemaHLSL/resource_binding_attr_error.hlsl clang/test/SemaHLSL/resource_binding_attr_error.hlsl
index 6a0b5956545d..cb728dca838c 100644
--- clang/test/SemaHLSL/resource_binding_attr_error.hlsl
+++ clang/test/SemaHLSL/resource_binding_attr_error.hlsl
@@ -2,7 +2,7 @@
 
 template<typename T>
 struct MyTemplatedSRV {
-  [[hlsl::resource_class(SRV)]] T x;
+  __hlsl_resource_t [[hlsl::resource_class(SRV)]] x;
 };
 
 // valid, The register keyword in this statement isn't binding a resource, rather it is
diff --git clang/test/SemaHLSL/resource_binding_attr_error_resource.hlsl clang/test/SemaHLSL/resource_binding_attr_error_resource.hlsl
index c40d1d7f60b3..4b6af47c0ab7 100644
--- clang/test/SemaHLSL/resource_binding_attr_error_resource.hlsl
+++ clang/test/SemaHLSL/resource_binding_attr_error_resource.hlsl
@@ -6,23 +6,23 @@
 
 template<typename T>
 struct MyTemplatedSRV {
-  [[hlsl::resource_class(SRV)]] T x;
+  __hlsl_resource_t [[hlsl::resource_class(SRV)]] x;
 };
 
 struct MySRV {
-  [[hlsl::resource_class(SRV)]] int x;
+  __hlsl_resource_t [[hlsl::resource_class(SRV)]] x;
 };
 
 struct MySampler {
-  [[hlsl::resource_class(Sampler)]] int x;
+  __hlsl_resource_t [[hlsl::resource_class(Sampler)]] x;
 };
 
 struct MyUAV {
-  [[hlsl::resource_class(UAV)]] int x;
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] x;
 };
 
 struct MyCBuffer {
-  [[hlsl::resource_class(CBuffer)]] int x;
+  __hlsl_resource_t [[hlsl::resource_class(CBuffer)]] x;
 };
 
 
diff --git clang/test/SemaHLSL/resource_binding_attr_error_udt.hlsl clang/test/SemaHLSL/resource_binding_attr_error_udt.hlsl
index edb3f30739cd..ea2d576e4cca 100644
--- clang/test/SemaHLSL/resource_binding_attr_error_udt.hlsl
+++ clang/test/SemaHLSL/resource_binding_attr_error_udt.hlsl
@@ -2,23 +2,23 @@
 
 template<typename T>
 struct MyTemplatedUAV {
-  [[hlsl::resource_class(UAV)]] T x;
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] x;
 };
 
 struct MySRV {
-  [[hlsl::resource_class(SRV)]] int x;
+  __hlsl_resource_t [[hlsl::resource_class(SRV)]] x;
 };
 
 struct MySampler {
-  [[hlsl::resource_class(Sampler)]] int x;
+  __hlsl_resource_t [[hlsl::resource_class(Sampler)]] x;
 };
 
 struct MyUAV {
-  [[hlsl::resource_class(UAV)]] int x;
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] x;
 };
 
 struct MyCBuffer {
-  [[hlsl::resource_class(CBuffer)]] int x;
+  __hlsl_resource_t [[hlsl::resource_class(CBuffer)]] x;
 };
 
 // Valid: f is skipped, SRVBuf is bound to t0, UAVBuf is bound to u0
diff --git clang/test/SemaObjC/ivar-access-tests.m clang/test/SemaObjC/ivar-access-tests.m
index cd7e09d406ad..6060dea5ab0f 100644
--- clang/test/SemaObjC/ivar-access-tests.m
+++ clang/test/SemaObjC/ivar-access-tests.m
@@ -2,6 +2,8 @@
 
 @interface MySuperClass
 {
+  int unmarked;
+
 @private
   int private;
 
@@ -17,6 +19,7 @@
 - (void) test {
     int access;
     MySuperClass *s = 0;
+    access = s->unmarked;
     access = s->private;   
     access = s->protected;
 }
@@ -30,9 +33,11 @@
 - (void) test {
     int access;
     MySuperClass *s = 0;
+    access = s->unmarked;
     access = s->private; // expected-error {{instance variable 'private' is private}}
     access = s->protected;
     MyClass *m=0;
+    access = m->unmarked;
     access = m->private; // expected-error {{instance variable 'private' is private}}
     access = m->protected;
 }
@@ -46,9 +51,11 @@
 - (void) test {
     int access;
     MySuperClass *s = 0;
+    access = s->unmarked;
     access = s->private; // expected-error {{instance variable 'private' is private}}
     access = s->protected;
     MyClass *m=0;
+    access = m->unmarked;
     access = m->private; // expected-error {{instance variable 'private' is private}}
     access = m->protected;
 }
@@ -61,9 +68,11 @@
 - (void) test {
     int access;
     MySuperClass *s = 0;
+    access = s->unmarked; // expected-error {{instance variable 'unmarked' is protected}}
     access = s->private; // expected-error {{instance variable 'private' is private}}
     access = s->protected; // expected-error {{instance variable 'protected' is protected}}
     MyClass *m=0;
+    access = m->unmarked; // expected-error {{instance variable 'unmarked' is protected}}
     access = m->private; // expected-error {{instance variable 'private' is private}}
     access = m->protected; // expected-error {{instance variable 'protected' is protected}}
 }
@@ -73,6 +82,7 @@ int main (void)
 {
   MySuperClass *s = 0;
   int access;
+  access = s->unmarked; // expected-error {{instance variable 'unmarked' is protected}}
   access = s->private;   // expected-error {{instance variable 'private' is private}}
   access = s->protected; // expected-error {{instance variable 'protected' is protected}}
   return 0;
diff --git clang/test/SemaOpenACC/compute-construct-ast.cpp clang/test/SemaOpenACC/compute-construct-ast.cpp
index e632522f877b..7a33aeb80570 100644
--- clang/test/SemaOpenACC/compute-construct-ast.cpp
+++ clang/test/SemaOpenACC/compute-construct-ast.cpp
@@ -117,5 +117,26 @@ struct S {
 void use() {
   TemplFunc<S>();
 }
-#endif
 
+struct HasCtor { HasCtor(); operator int(); ~HasCtor();};
+
+void useCtorType() {
+  // CHECK-LABEL: useCtorType
+  // CHECK-NEXT: CompoundStmt
+
+#pragma acc kernels num_workers(HasCtor{})
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}} kernels
+  // CHECK-NEXT: num_workers clause
+  // CHECK-NEXT: ImplicitCastExpr{{.*}}'int' <UserDefinedConversion>
+  // CHECK-NEXT: CXXMemberCallExpr{{.*}}'int'
+  // CHECK-NEXT: MemberExpr{{.*}}.operator int
+  // CHECK-NEXT: MaterializeTemporaryExpr{{.*}}'HasCtor'
+  // CHECK-NEXT: CXXBindTemporaryExpr{{.*}}'HasCtor'
+  // CHECK-NEXT: CXXTemporaryObjectExpr{{.*}}'HasCtor'
+
+  while(true);
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+}
+#endif
diff --git clang/test/SemaTemplate/GH18291.cpp clang/test/SemaTemplate/GH18291.cpp
index ca1e69e4ca3f..820564ffa6f1 100644
--- clang/test/SemaTemplate/GH18291.cpp
+++ clang/test/SemaTemplate/GH18291.cpp
@@ -86,4 +86,29 @@ namespace func_pointer {
     template <class _Tp> void pow(_Tp, complex<typename __promote<_Tp>::type>) = delete;
     void (*ptr)(const complex<float> &, complex<float>){pow};
   } // namespace param
-} // namespace t3
+} // namespace func_pointer
+
+namespace static_vs_nonstatic {
+  namespace implicit_obj_param {
+    struct A {
+      template <class... Args>
+        static void f(int a, Args... args) {}
+      template <class... Args>
+        void f(Args... args) = delete;
+    };
+    void g(){
+      A::f(0);
+    }
+  } // namespace implicit_obj_param
+  namespace explicit_obj_param {
+    struct A {
+      template <class... Args>
+        static void f(int, Args... args) {}
+      template <class... Args>
+        void f(this A *, Args... args) = delete;
+    };
+    void g(){
+      A::f(0);
+    }
+  } // namespace explicit_obj_param
+} // namespace static_vs_nonstatic
diff --git clang/test/SemaTemplate/cwg2398.cpp clang/test/SemaTemplate/cwg2398.cpp
index 6fe1bd3d4f16..1d9747276fbe 100644
--- clang/test/SemaTemplate/cwg2398.cpp
+++ clang/test/SemaTemplate/cwg2398.cpp
@@ -65,26 +65,20 @@ namespace class_template {
   template <class T3> struct B;
 
   template <template <class T4> class TT1, class T5> struct B<TT1<T5>>;
-  // new-note@-1 {{partial specialization matches}}
 
   template <class T6, class T7> struct B<A<T6, T7>> {};
-  // new-note@-1 {{partial specialization matches}}
 
   template struct B<A<int>>;
-  // new-error@-1 {{ambiguous partial specialization}}
 } // namespace class_template
 
 namespace class_template_func {
   template <class T1, class T2 = float> struct A {};
 
   template <template <class T4> class TT1, class T5> void f(TT1<T5>);
-  // new-note@-1 {{candidate function}}
-
   template <class T6, class T7>                      void f(A<T6, T7>) {};
-  // new-note@-1 {{candidate function}}
 
   void g() {
-    f(A<int>()); // new-error {{call to 'f' is ambiguous}}
+    f(A<int>());
   }
 } // namespace class_template_func
 
@@ -326,6 +320,48 @@ namespace classes {
       // expected-error@-1 {{no matching function for call}}
     }
   } // namespace packs
+  namespace nested {
+    template <class T1, int V1, int V2> struct A {
+      using type = T1;
+      static constexpr int v1 = V1, v2 = V2;
+    };
+
+    template <template <class T1> class TT1> auto f(TT1<int>) {
+      return TT1<float>();
+    }
+
+    template <template <class T2, int V3> class TT2> auto g(TT2<double, 1>) {
+      // new-note@-1 {{too few template arguments for class template 'A'}}
+      // old-note@-2 {{template template argument has different template parameters}}
+      return f(TT2<int, 2>());
+    }
+
+    using B = decltype(g(A<double, 1, 3>()));
+    // expected-error@-1 {{no matching function for call}}
+
+    using X = B::type; // expected-error {{undeclared identifier 'B'}}
+    using X = float;
+    static_assert(B::v1 == 2); // expected-error {{undeclared identifier 'B'}}
+    static_assert(B::v2 == 3); // expected-error {{undeclared identifier 'B'}}
+  }
+  namespace defaulted {
+    template <class T1, class T2 = T1*> struct A {
+      using type = T2;
+    };
+
+    template <template <class> class TT> TT<float> f(TT<int>);
+    // new-note@-1  {{deduced type 'A<[...], (default) int *>' of 1st parameter does not match adjusted type 'A<[...], double *>' of argument [with TT = A]}}
+    // old-note@-2 2{{template template argument has different template parameters}}
+
+    using X = int*; // new-note {{previous definition is here}}
+    using X = decltype(f(A<int>()))::type;
+    // new-error@-1 {{different types ('decltype(f(A<int>()))::type' (aka 'float *') vs 'int *')}}
+    // old-error@-2 {{no matching function for call}}
+
+    using Y = double*;
+    using Y = decltype(f(A<int, double*>()))::type;
+    // expected-error@-1 {{no matching function for call}}
+  } // namespace defaulted
 } // namespace classes
 
 namespace regression1 {
diff --git clang/test/SemaTemplate/pack-deduction.cpp clang/test/SemaTemplate/pack-deduction.cpp
index e42709820e9c..28fb127a3864 100644
--- clang/test/SemaTemplate/pack-deduction.cpp
+++ clang/test/SemaTemplate/pack-deduction.cpp
@@ -185,3 +185,17 @@ void Run() {
   Outer<void>::Inner<0>().Test(1,1);
 }
 }
+
+namespace GH107560 {
+int bar(...);
+
+template <int> struct Int {};
+
+template <class ...T>
+constexpr auto foo(T... x) -> decltype(bar(T(x)...)) { return 10; }
+
+template <class ...T>
+constexpr auto baz(Int<foo<T>(T())>... x) -> int { return 1; }
+
+static_assert(baz<Int<1>, Int<2>, Int<3>>(Int<10>(), Int<10>(), Int<10>()) == 1, "");
+}
diff --git clang/tools/CMakeLists.txt clang/tools/CMakeLists.txt
index 4885afc1584d..f588a3634ee6 100644
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -5,7 +5,6 @@ add_clang_subdirectory(driver)
 add_clang_subdirectory(apinotes-test)
 add_clang_subdirectory(clang-diff)
 add_clang_subdirectory(clang-format)
-add_clang_subdirectory(clang-format-vs)
 add_clang_subdirectory(clang-fuzzer)
 add_clang_subdirectory(clang-import-test)
 add_clang_subdirectory(clang-linker-wrapper)
diff --git clang/tools/c-arcmt-test/c-arcmt-test.c clang/tools/c-arcmt-test/c-arcmt-test.c
index 00999f188c7d..4d0c418714b9 100644
--- clang/tools/c-arcmt-test/c-arcmt-test.c
+++ clang/tools/c-arcmt-test/c-arcmt-test.c
@@ -109,10 +109,10 @@ static void flush_atexit(void) {
 
 int main(int argc, const char **argv) {
 #ifdef __MVS__
-  if (enableAutoConversion(fileno(stdout)) == -1)
+  if (enablezOSAutoConversion(fileno(stdout)) == -1)
     fprintf(stderr, "Setting conversion on stdout failed\n");
 
-  if (enableAutoConversion(fileno(stderr)) == -1)
+  if (enablezOSAutoConversion(fileno(stderr)) == -1)
     fprintf(stderr, "Setting conversion on stderr failed\n");
 #endif
 
diff --git clang/tools/c-index-test/c-index-test.c clang/tools/c-index-test/c-index-test.c
index f472a67f3bc5..b48f44950ab7 100644
--- clang/tools/c-index-test/c-index-test.c
+++ clang/tools/c-index-test/c-index-test.c
@@ -5180,10 +5180,10 @@ int main(int argc, const char **argv) {
   thread_info client_data;
 
 #ifdef __MVS__
-  if (enableAutoConversion(fileno(stdout)) == -1)
+  if (enablezOSAutoConversion(fileno(stdout)) == -1)
     fprintf(stderr, "Setting conversion on stdout failed\n");
 
-  if (enableAutoConversion(fileno(stderr)) == -1)
+  if (enablezOSAutoConversion(fileno(stderr)) == -1)
     fprintf(stderr, "Setting conversion on stderr failed\n");
 #endif
 
diff --git clang/tools/clang-format-vs/.gitignore clang/tools/clang-format-vs/.gitignore
deleted file mode 100644
index 270d840cb6d1..000000000000
--- clang/tools/clang-format-vs/.gitignore
+++ /dev/null
@@ -1,11 +0,0 @@
-# Visual Studio files
-.vs/
-*.user
-/packages/
-/ClangFormat/obj/
-/ClangFormat/bin/
-
-# Generated and copied files
-/ClangFormat/Key.snk
-/ClangFormat/clang-format.exe
-/ClangFormat/source.extension.vsixmanifest
diff --git clang/tools/clang-format-vs/CMakeLists.txt clang/tools/clang-format-vs/CMakeLists.txt
deleted file mode 100644
index 1d44a47a3137..000000000000
--- clang/tools/clang-format-vs/CMakeLists.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-option(BUILD_CLANG_FORMAT_VS_PLUGIN "Build clang-format VS plugin" OFF)
-if (BUILD_CLANG_FORMAT_VS_PLUGIN)
-  add_custom_target(clang_format_exe_for_vsix
-      ${CMAKE_COMMAND} -E copy_if_different
-      "${LLVM_TOOLS_BINARY_DIR}/clang-format.exe"
-      "${CMAKE_CURRENT_SOURCE_DIR}/ClangFormat/clang-format.exe"
-      DEPENDS clang-format)
-
-  # Build number added to Clang version to ensure that new VSIX can be upgraded
-  string(TIMESTAMP CLANG_FORMAT_VSIX_BUILD %y%m%d%H%M UTC)
-
-  if (NOT CLANG_FORMAT_VS_VERSION)
-    set(CLANG_FORMAT_VS_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}.${CLANG_FORMAT_VSIX_BUILD}")
-  endif()
-
-  configure_file("source.extension.vsixmanifest.in"
-      "${CMAKE_CURRENT_SOURCE_DIR}/ClangFormat/source.extension.vsixmanifest")
-
-  find_program(NUGET_EXE nuget PATHS ${NUGET_EXE_DIR})
-  if (NOT NUGET_EXE)
-    message(FATAL_ERROR "Could not find nuget.exe. Download from https://www.nuget.org/nuget.exe"
-                        " and add parent directory to PATH or pass it via NUGET_EXE_DIR var.")
-  endif()
-
-  add_custom_target(clang_format_vsix ALL
-      COMMAND ${NUGET_EXE} restore "${CMAKE_CURRENT_SOURCE_DIR}/ClangFormat.sln"
-      COMMAND devenv "${CMAKE_CURRENT_SOURCE_DIR}/ClangFormat.sln" /Build Release
-      DEPENDS clang_format_exe_for_vsix "${CMAKE_CURRENT_SOURCE_DIR}/ClangFormat/source.extension.vsixmanifest"
-      COMMAND ${CMAKE_COMMAND} -E copy_if_different
-      "${CMAKE_CURRENT_SOURCE_DIR}/ClangFormat/bin/Release/ClangFormat.vsix"
-      "${LLVM_TOOLS_BINARY_DIR}/ClangFormat.vsix"
-      DEPENDS clang_format_exe_for_vsix)
-endif()
diff --git clang/tools/clang-format-vs/ClangFormat.sln clang/tools/clang-format-vs/ClangFormat.sln
deleted file mode 100644
index 46d742bce3f0..000000000000
--- clang/tools/clang-format-vs/ClangFormat.sln
+++ /dev/null
@@ -1,22 +0,0 @@
-
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 15
-VisualStudioVersion = 15.0.26228.12
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ClangFormat", "ClangFormat\ClangFormat.csproj", "{7FD1783E-2D31-4D05-BF23-6EBE1B42B608}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Release|Any CPU = Release|Any CPU
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{7FD1783E-2D31-4D05-BF23-6EBE1B42B608}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{7FD1783E-2D31-4D05-BF23-6EBE1B42B608}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{7FD1783E-2D31-4D05-BF23-6EBE1B42B608}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{7FD1783E-2D31-4D05-BF23-6EBE1B42B608}.Release|Any CPU.Build.0 = Release|Any CPU
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
diff --git clang/tools/clang-format-vs/ClangFormat/ClangFormat.csproj clang/tools/clang-format-vs/ClangFormat/ClangFormat.csproj
deleted file mode 100644
index e5b7ec008a1a..000000000000
--- clang/tools/clang-format-vs/ClangFormat/ClangFormat.csproj
+++ /dev/null
@@ -1,261 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="4.0">
-  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
-  <PropertyGroup>
-    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
-    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
-    <SchemaVersion>2.0</SchemaVersion>
-    <ProjectGuid>{7FD1783E-2D31-4D05-BF23-6EBE1B42B608}</ProjectGuid>
-    <ProjectTypeGuids>{82b43b9b-a64c-4715-b499-d71e9ca2bd60};{60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
-    <OutputType>Library</OutputType>
-    <AppDesignerFolder>Properties</AppDesignerFolder>
-    <RootNamespace>LLVM.ClangFormat</RootNamespace>
-    <AssemblyName>ClangFormat</AssemblyName>
-    <SignAssembly>true</SignAssembly>
-    <AssemblyOriginatorKeyFile>Key.snk</AssemblyOriginatorKeyFile>
-    <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
-    <MinimumVisualStudioVersion>15.0</MinimumVisualStudioVersion>
-    <FileUpgradeFlags>
-    </FileUpgradeFlags>
-    <UpgradeBackupLocation>
-    </UpgradeBackupLocation>
-    <OldToolsVersion>4.0</OldToolsVersion>
-    <PublishUrl>publish\</PublishUrl>
-    <Install>true</Install>
-    <InstallFrom>Disk</InstallFrom>
-    <UpdateEnabled>false</UpdateEnabled>
-    <UpdateMode>Foreground</UpdateMode>
-    <UpdateInterval>7</UpdateInterval>
-    <UpdateIntervalUnits>Days</UpdateIntervalUnits>
-    <UpdatePeriodically>false</UpdatePeriodically>
-    <UpdateRequired>false</UpdateRequired>
-    <MapFileExtensions>true</MapFileExtensions>
-    <ApplicationRevision>0</ApplicationRevision>
-    <ApplicationVersion>1.0.0.%2a</ApplicationVersion>
-    <IsWebBootstrapper>false</IsWebBootstrapper>
-    <UseApplicationTrust>false</UseApplicationTrust>
-    <BootstrapperEnabled>true</BootstrapperEnabled>
-    <TargetFrameworkProfile />
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
-    <DebugSymbols>true</DebugSymbols>
-    <DebugType>full</DebugType>
-    <Optimize>false</Optimize>
-    <OutputPath>bin\Debug\</OutputPath>
-    <DefineConstants>DEBUG;TRACE</DefineConstants>
-    <ErrorReport>prompt</ErrorReport>
-    <WarningLevel>4</WarningLevel>
-    <Prefer32Bit>false</Prefer32Bit>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
-    <DebugType>pdbonly</DebugType>
-    <Optimize>true</Optimize>
-    <OutputPath>bin\Release\</OutputPath>
-    <DefineConstants>TRACE</DefineConstants>
-    <ErrorReport>prompt</ErrorReport>
-    <WarningLevel>4</WarningLevel>
-    <RunCodeAnalysis>true</RunCodeAnalysis>
-    <Prefer32Bit>false</Prefer32Bit>
-  </PropertyGroup>
-  <ItemGroup>
-    <Reference Include="envdte, Version=8.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
-      <EmbedInteropTypes>True</EmbedInteropTypes>
-    </Reference>
-    <Reference Include="envdte80, Version=8.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
-      <EmbedInteropTypes>True</EmbedInteropTypes>
-    </Reference>
-    <Reference Include="Microsoft.CSharp" />
-    <Reference Include="Microsoft.VisualStudio.CoreUtility, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
-      <HintPath>..\packages\VSSDK.CoreUtility.10.0.4\lib\net40\Microsoft.VisualStudio.CoreUtility.dll</HintPath>
-      <Private>False</Private>
-    </Reference>
-    <Reference Include="Microsoft.VisualStudio.Editor, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
-      <HintPath>..\packages\VSSDK.Editor.10.0.4\lib\net40\Microsoft.VisualStudio.Editor.dll</HintPath>
-      <Private>False</Private>
-    </Reference>
-    <Reference Include="Microsoft.VisualStudio.OLE.Interop, Version=7.1.40304.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
-      <HintPath>..\packages\VSSDK.OLE.Interop.7.0.4\lib\net20\Microsoft.VisualStudio.OLE.Interop.dll</HintPath>
-      <Private>True</Private>
-      <Private>False</Private>
-    </Reference>
-    <Reference Include="Microsoft.VisualStudio.Shell.10.0, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
-      <HintPath>..\packages\VSSDK.Shell.10.10.0.3\lib\net40\Microsoft.VisualStudio.Shell.10.0.dll</HintPath>
-      <Private>False</Private>
-    </Reference>
-    <Reference Include="Microsoft.VisualStudio.Shell.Immutable.10.0, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
-      <HintPath>..\packages\VSSDK.Shell.Immutable.10.10.0.3\lib\net40\Microsoft.VisualStudio.Shell.Immutable.10.0.dll</HintPath>
-      <Private>True</Private>
-    </Reference>
-    <Reference Include="Microsoft.VisualStudio.Shell.Interop, Version=7.1.40304.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
-      <HintPath>..\packages\VSSDK.Shell.Interop.7.0.4\lib\net20\Microsoft.VisualStudio.Shell.Interop.dll</HintPath>
-      <Private>True</Private>
-      <Private>False</Private>
-    </Reference>
-    <Reference Include="Microsoft.VisualStudio.Shell.Interop.8.0, Version=8.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
-      <HintPath>..\packages\VSSDK.Shell.Interop.8.8.0.3\lib\net20\Microsoft.VisualStudio.Shell.Interop.8.0.dll</HintPath>
-      <Private>True</Private>
-      <Private>False</Private>
-    </Reference>
-    <Reference Include="Microsoft.VisualStudio.Shell.Interop.10.0" />
-    <Reference Include="Microsoft.VisualStudio.Shell.Interop.9.0, Version=9.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
-      <HintPath>..\packages\VSSDK.Shell.Interop.9.9.0.3\lib\net20\Microsoft.VisualStudio.Shell.Interop.9.0.dll</HintPath>
-      <Private>True</Private>
-      <Private>False</Private>
-    </Reference>
-    <Reference Include="Microsoft.VisualStudio.Text.Data, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
-      <HintPath>..\packages\VSSDK.Text.10.0.4\lib\net40\Microsoft.VisualStudio.Text.Data.dll</HintPath>
-      <Private>False</Private>
-    </Reference>
-    <Reference Include="Microsoft.VisualStudio.Text.Logic, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
-      <HintPath>..\packages\VSSDK.Text.10.0.4\lib\net40\Microsoft.VisualStudio.Text.Logic.dll</HintPath>
-      <Private>False</Private>
-    </Reference>
-    <Reference Include="Microsoft.VisualStudio.Text.UI, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
-      <HintPath>..\packages\VSSDK.Text.10.0.4\lib\net40\Microsoft.VisualStudio.Text.UI.dll</HintPath>
-      <Private>False</Private>
-    </Reference>
-    <Reference Include="Microsoft.VisualStudio.Text.UI.Wpf, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
-      <HintPath>..\packages\VSSDK.Text.10.0.4\lib\net40\Microsoft.VisualStudio.Text.UI.Wpf.dll</HintPath>
-      <Private>False</Private>
-    </Reference>
-    <Reference Include="Microsoft.VisualStudio.TextManager.Interop, Version=7.1.40304.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
-      <Private>False</Private>
-    </Reference>
-    <Reference Include="Microsoft.VisualStudio.TextManager.Interop.8.0, Version=8.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
-      <HintPath>..\packages\VSSDK.TextManager.Interop.8.8.0.4\lib\net20\Microsoft.VisualStudio.TextManager.Interop.8.0.dll</HintPath>
-      <Private>True</Private>
-      <Private>False</Private>
-    </Reference>
-    <Reference Include="PresentationCore" />
-    <Reference Include="PresentationFramework" />
-    <Reference Include="stdole, Version=7.0.3300.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
-      <HintPath>..\packages\VSSDK.DTE.7.0.3\lib\net20\stdole.dll</HintPath>
-      <EmbedInteropTypes>False</EmbedInteropTypes>
-    </Reference>
-    <Reference Include="System" />
-    <Reference Include="System.ComponentModel.Composition" />
-    <Reference Include="System.Core" />
-    <Reference Include="System.Data" />
-    <Reference Include="System.Design" />
-    <Reference Include="System.Drawing" />
-    <Reference Include="System.Windows.Forms" />
-    <Reference Include="System.Xml" />
-    <Reference Include="System.Xml.Linq" />
-    <Reference Include="WindowsBase" />
-  </ItemGroup>
-  <ItemGroup>
-    <COMReference Include="Microsoft.VisualStudio.CommandBars">
-      <Guid>{1CBA492E-7263-47BB-87FE-639000619B15}</Guid>
-      <VersionMajor>8</VersionMajor>
-      <VersionMinor>0</VersionMinor>
-      <Lcid>0</Lcid>
-      <WrapperTool>primary</WrapperTool>
-      <Isolated>False</Isolated>
-      <EmbedInteropTypes>False</EmbedInteropTypes>
-    </COMReference>
-    <COMReference Include="stdole">
-      <Guid>{00020430-0000-0000-C000-000000000046}</Guid>
-      <VersionMajor>2</VersionMajor>
-      <VersionMinor>0</VersionMinor>
-      <Lcid>0</Lcid>
-      <WrapperTool>primary</WrapperTool>
-      <Isolated>False</Isolated>
-      <EmbedInteropTypes>False</EmbedInteropTypes>
-    </COMReference>
-  </ItemGroup>
-  <ItemGroup>
-    <Compile Include="Guids.cs" />
-    <Compile Include="Resources.Designer.cs">
-      <AutoGen>True</AutoGen>
-      <DesignTime>True</DesignTime>
-      <DependentUpon>Resources.resx</DependentUpon>
-    </Compile>
-    <Compile Include="GlobalSuppressions.cs" />
-    <Compile Include="ClangFormatPackage.cs">
-      <SubType>Component</SubType>
-    </Compile>
-    <Compile Include="Properties\AssemblyInfo.cs" />
-    <Compile Include="PkgCmdID.cs" />
-    <Compile Include="RunningDocTableEventsDispatcher.cs" />
-    <Compile Include="Vsix.cs" />
-  </ItemGroup>
-  <ItemGroup>
-    <EmbeddedResource Include="Resources.resx">
-      <Generator>ResXFileCodeGenerator</Generator>
-      <LastGenOutput>Resources.Designer.cs</LastGenOutput>
-      <SubType>Designer</SubType>
-    </EmbeddedResource>
-    <EmbeddedResource Include="VSPackage.resx">
-      <MergeWithCTO>true</MergeWithCTO>
-      <ManifestResourceName>VSPackage</ManifestResourceName>
-    </EmbeddedResource>
-  </ItemGroup>
-  <ItemGroup>
-    <None Include="Key.snk" />
-    <None Include="packages.config">
-      <SubType>Designer</SubType>
-    </None>
-    <None Include="source.extension.vsixmanifest">
-      <SubType>Designer</SubType>
-    </None>
-  </ItemGroup>
-  <ItemGroup>
-    <VSCTCompile Include="ClangFormat.vsct">
-      <ResourceName>Menus.ctmenu</ResourceName>
-    </VSCTCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <None Include="Resources\Images_32bit.bmp" />
-  </ItemGroup>
-  <ItemGroup>
-    <Content Include="clang-format.exe">
-      <IncludeInVSIX>true</IncludeInVSIX>
-    </Content>
-    <Content Include="license.txt">
-      <IncludeInVSIX>true</IncludeInVSIX>
-    </Content>
-    <Content Include="Resources\Package.ico" />
-  </ItemGroup>
-  <ItemGroup>
-    <BootstrapperPackage Include=".NETFramework,Version=v4.0">
-      <Visible>False</Visible>
-      <ProductName>Microsoft .NET Framework 4 %28x86 and x64%29</ProductName>
-      <Install>true</Install>
-    </BootstrapperPackage>
-    <BootstrapperPackage Include="Microsoft.Net.Client.3.5">
-      <Visible>False</Visible>
-      <ProductName>.NET Framework 3.5 SP1 Client Profile</ProductName>
-      <Install>false</Install>
-    </BootstrapperPackage>
-    <BootstrapperPackage Include="Microsoft.Net.Framework.3.5.SP1">
-      <Visible>False</Visible>
-      <ProductName>.NET Framework 3.5 SP1</ProductName>
-      <Install>false</Install>
-    </BootstrapperPackage>
-    <BootstrapperPackage Include="Microsoft.Windows.Installer.4.5">
-      <Visible>False</Visible>
-      <ProductName>Windows Installer 4.5</ProductName>
-      <Install>true</Install>
-    </BootstrapperPackage>
-  </ItemGroup>
-  <PropertyGroup>
-    <UseCodebase>true</UseCodebase>
-  </PropertyGroup>
-  <PropertyGroup>
-    <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">10.0</VisualStudioVersion>
-    <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
-  </PropertyGroup>
-  <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
-  <Import Project="$(VSToolsPath)\VSSDK\Microsoft.VsSDK.targets" Condition="'$(VSToolsPath)' != ''" />
-  <Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v10.0\VSSDK\Microsoft.VsSDK.targets" Condition="false" />
-  <PropertyGroup>
-    <PreBuildEvent>if not exist $(ProjectDir)Key.snk ("$(FrameworkSDKDir)Bin\NETFX 4.6 Tools\sn.exe" -k $(ProjectDir)Key.snk)</PreBuildEvent>
-  </PropertyGroup>
-  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
-       Other similar extension points exist, see Microsoft.Common.targets.
-  <Target Name="BeforeBuild">
-  </Target>
-  <Target Name="AfterBuild">
-  </Target>
-  -->
-</Project>
diff --git clang/tools/clang-format-vs/ClangFormat/ClangFormat.vsct clang/tools/clang-format-vs/ClangFormat/ClangFormat.vsct
deleted file mode 100644
index 798957740d54..000000000000
--- clang/tools/clang-format-vs/ClangFormat/ClangFormat.vsct
+++ /dev/null
@@ -1,127 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<CommandTable xmlns="http://schemas.microsoft.com/VisualStudio/2005-10-18/CommandTable" xmlns:xs="http://www.w3.org/2001/XMLSchema">
-
-  <!--  This is the file that defines the actual layout and type of the commands.
-        It is divided in different sections (e.g. command definition, command
-        placement, ...), with each defining a specific set of properties.
-        See the comment before each section for more details about how to
-        use it. -->
-
-  <!--  The VSCT compiler (the tool that translates this file into the binary 
-        format that VisualStudio will consume) has the ability to run a preprocessor 
-        on the vsct file; this preprocessor is (usually) the C++ preprocessor, so 
-        it is possible to define includes and macros with the same syntax used 
-        in C++ files. Using this ability of the compiler here, we include some files 
-        defining some of the constants that we will use inside the file. -->
-
-  <!--This is the file that defines the IDs for all the commands exposed by VisualStudio. -->
-  <Extern href="stdidcmd.h"/>
-
-  <!--This header contains the command ids for the menus provided by the shell. -->
-  <Extern href="vsshlids.h"/>
-
-
-
-
-  <!--The Commands section is where we the commands, menus and menu groups are defined.
-      This section uses a Guid to identify the package that provides the command defined inside it. -->
-  <Commands package="guidClangFormatPkg">
-    <!-- Inside this section we have different sub-sections: one for the menus, another  
-    for the menu groups, one for the buttons (the actual commands), one for the combos 
-    and the last one for the bitmaps used. Each element is identified by a command id that  
-    is a unique pair of guid and numeric identifier; the guid part of the identifier is usually  
-    called "command set" and is used to group different command inside a logically related  
-    group; your package should define its own command set in order to avoid collisions  
-    with command ids defined by other packages. -->
-
-    
-    <!-- In this section you can define new menu groups. A menu group is a container for 
-         other menus or buttons (commands); from a visual point of view you can see the 
-         group as the part of a menu contained between two lines. The parent of a group 
-         must be a menu. -->
-    <Groups>
-
-      <Group guid="guidClangFormatCmdSet" id="MyMenuGroup" priority="0x0600">
-        <Parent guid="guidSHLMainMenu" id="IDM_VS_MENU_TOOLS"/>
-      </Group>
-      
-
-
-    </Groups>
-    
-    <!--Buttons section. -->
-    <!--This section defines the elements the user can interact with, like a menu command or a button 
-        or combo box in a toolbar. -->
-    <Buttons>
-      <!--To define a menu group you have to specify its ID, the parent menu and its display priority. 
-          The command is visible and enabled by default. If you need to change the visibility, status, etc, you can use
-          the CommandFlag node.
-          You can add more than one CommandFlag node e.g.:
-              <CommandFlag>DefaultInvisible</CommandFlag>
-              <CommandFlag>DynamicVisibility</CommandFlag>
-          If you do not want an image next to your command, remove the Icon node /> -->
-
-      <Button guid="guidClangFormatCmdSet" id="cmdidClangFormatSelection" priority="0x0100" type="Button">
-        <Parent guid="guidClangFormatCmdSet" id="MyMenuGroup" />
-        <Icon guid="guidImages" id="bmpPic1" />
-        <Strings>
-          <ButtonText>Clang Format Selection</ButtonText>
-        </Strings>
-      </Button>
-
-      <Button guid="guidClangFormatCmdSet" id="cmdidClangFormatDocument" priority="0x0101" type="Button">
-        <Parent guid="guidClangFormatCmdSet" id="MyMenuGroup" />
-        <Icon guid="guidImages" id="bmpPic2" />
-        <Strings>
-          <ButtonText>Clang Format Document</ButtonText>
-        </Strings>
-      </Button>
-
-    </Buttons>
-   
-    <!--The bitmaps section is used to define the bitmaps that are used for the commands.-->
-    <Bitmaps>
-      <!--  The bitmap id is defined in a way that is a little bit different from the others: 
-            the declaration starts with a guid for the bitmap strip, then there is the resource id of the 
-            bitmap strip containing the bitmaps and then there are the numeric ids of the elements used 
-            inside a button definition. An important aspect of this declaration is that the element id 
-            must be the actual index (1-based) of the bitmap inside the bitmap strip. -->
-      <Bitmap guid="guidImages" href="Resources\Images_32bit.bmp" usedList="bmpPic1, bmpPic2, bmpPicSearch, bmpPicX, bmpPicArrows"/>
-      
-    </Bitmaps>
- 
-  </Commands>
-
-
-  <KeyBindings>
-    <KeyBinding guid="guidClangFormatCmdSet" id="cmdidClangFormatSelection" editor="guidTextEditor" key1="R" mod1="Control" key2="F" mod2="Control"/>
-    <KeyBinding guid="guidClangFormatCmdSet" id="cmdidClangFormatDocument" editor="guidTextEditor" key1="R" mod1="Control" key2="D" mod2="Control"/>
-  </KeyBindings>
-
-
-
-  <Symbols>
-    <!-- This is the package guid. -->
-    <GuidSymbol name="guidClangFormatPkg" value="{c5286038-25d3-4f65-83a8-51fa2df4a146}" />
-    
-    <!-- This is the guid used to group the menu commands together -->
-    <GuidSymbol name="guidClangFormatCmdSet" value="{e39cbab1-0f96-4022-a2bc-da5a9db7eb78}">
-
-      <IDSymbol name="MyMenuGroup" value="0x1020" />
-      <IDSymbol name="cmdidClangFormatSelection" value="0x0100" />
-      <IDSymbol name="cmdidClangFormatDocument" value="0x0101" />
-    </GuidSymbol>
-
-    <GuidSymbol name="guidTextEditor" value="{8B382828-6202-11d1-8870-0000F87579D2}" />
-
-
-    <GuidSymbol name="guidImages" value="{6d53937b-9ae1-42e1-8849-d876dcdbad7b}" >
-      <IDSymbol name="bmpPic1" value="1" />
-      <IDSymbol name="bmpPic2" value="2" />
-      <IDSymbol name="bmpPicSearch" value="3" />
-      <IDSymbol name="bmpPicX" value="4" />
-      <IDSymbol name="bmpPicArrows" value="5" />
-    </GuidSymbol>
-  </Symbols>
-
-</CommandTable>
diff --git clang/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs clang/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs
deleted file mode 100644
index 26a0af3b55b5..000000000000
--- clang/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs
+++ /dev/null
@@ -1,464 +0,0 @@
-//===-- ClangFormatPackages.cs - VSPackage for clang-format ------*- C# -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This class contains a VS extension package that runs clang-format over a
-// selection in a VS text editor.
-//
-//===----------------------------------------------------------------------===//
-
-using EnvDTE;
-using Microsoft.VisualStudio.Shell;
-using Microsoft.VisualStudio.Shell.Interop;
-using Microsoft.VisualStudio.Text;
-using Microsoft.VisualStudio.Text.Editor;
-using System;
-using System.Collections;
-using System.ComponentModel;
-using System.ComponentModel.Design;
-using System.IO;
-using System.Runtime.InteropServices;
-using System.Xml.Linq;
-using System.Linq;
-using System.Text;
-
-namespace LLVM.ClangFormat
-{
-    [ClassInterface(ClassInterfaceType.AutoDual)]
-    [CLSCompliant(false), ComVisible(true)]
-    public class OptionPageGrid : DialogPage
-    {
-        private string assumeFilename = "";
-        private string fallbackStyle = "LLVM";
-        private bool sortIncludes = false;
-        private string style = "file";
-        private bool formatOnSave = false;
-        private string formatOnSaveFileExtensions =
-            ".c;.cpp;.cxx;.cc;.tli;.tlh;.h;.hh;.hpp;.hxx;.hh;.inl;" +
-            ".java;.js;.ts;.m;.mm;.proto;.protodevel;.td";
-
-        public OptionPageGrid Clone()
-        {
-            // Use MemberwiseClone to copy value types.
-            var clone = (OptionPageGrid)MemberwiseClone();
-            return clone;
-        }
-
-        public class StyleConverter : TypeConverter
-        {
-            protected ArrayList values;
-            public StyleConverter()
-            {
-                // Initializes the standard values list with defaults.
-                values = new ArrayList(new string[] { "file", "Chromium", "Google", "LLVM", "Mozilla", "WebKit" });
-            }
-
-            public override bool GetStandardValuesSupported(ITypeDescriptorContext context)
-            {
-                return true;
-            }
-
-            public override StandardValuesCollection GetStandardValues(ITypeDescriptorContext context)
-            {
-                return new StandardValuesCollection(values);
-            }
-
-            public override bool CanConvertFrom(ITypeDescriptorContext context, Type sourceType)
-            {
-                if (sourceType == typeof(string))
-                    return true;
-
-                return base.CanConvertFrom(context, sourceType);
-            }
-
-            public override object ConvertFrom(ITypeDescriptorContext context, System.Globalization.CultureInfo culture, object value)
-            {
-                string s = value as string;
-                if (s == null)
-                    return base.ConvertFrom(context, culture, value);
-
-                return value;
-            }
-        }
-
-        [Category("Format Options")]
-        [DisplayName("Style")]
-        [Description("Coding style, currently supports:\n" +
-                     "  - Predefined styles ('LLVM', 'Google', 'Chromium', 'Mozilla', 'WebKit').\n" +
-                     "  - 'file' to search for a YAML .clang-format or _clang-format\n" +
-                     "    configuration file.\n" +
-                     "  - A YAML configuration snippet.\n\n" +
-                     "'File':\n" +
-                     "  Searches for a .clang-format or _clang-format configuration file\n" +
-                     "  in the source file's directory and its parents.\n\n" +
-                     "YAML configuration snippet:\n" +
-                     "  The content of a .clang-format configuration file, as string.\n" +
-                     "  Example: '{BasedOnStyle: \"LLVM\", IndentWidth: 8}'\n\n" +
-                     "See also: http://clang.llvm.org/docs/ClangFormatStyleOptions.html.")]
-        [TypeConverter(typeof(StyleConverter))]
-        public string Style
-        {
-            get { return style; }
-            set { style = value; }
-        }
-
-        public sealed class FilenameConverter : TypeConverter
-        {
-            public override bool CanConvertFrom(ITypeDescriptorContext context, Type sourceType)
-            {
-                if (sourceType == typeof(string))
-                    return true;
-
-                return base.CanConvertFrom(context, sourceType);
-            }
-
-            public override object ConvertFrom(ITypeDescriptorContext context, System.Globalization.CultureInfo culture, object value)
-            {
-                string s = value as string;
-                if (s == null)
-                    return base.ConvertFrom(context, culture, value);
-
-                // Check if string contains quotes. On Windows, file names cannot contain quotes.
-                // We do not accept them however to avoid hard-to-debug problems.
-                // A quote in user input would end the parameter quote and so break the command invocation.
-                if (s.IndexOf('\"') != -1)
-                    throw new NotSupportedException("Filename cannot contain quotes");
-
-                return value;
-            }
-        }
-
-        [Category("Format Options")]
-        [DisplayName("Assume Filename")]
-        [Description("When reading from stdin, clang-format assumes this " +
-                     "filename to look for a style config file (with 'file' style) " +
-                     "and to determine the language.")]
-        [TypeConverter(typeof(FilenameConverter))]
-        public string AssumeFilename
-        {
-            get { return assumeFilename; }
-            set { assumeFilename = value; }
-        }
-
-        public sealed class FallbackStyleConverter : StyleConverter
-        {
-            public FallbackStyleConverter()
-            {
-                // Add "none" to the list of styles.
-                values.Insert(0, "none");
-            }
-        }
-
-        [Category("Format Options")]
-        [DisplayName("Fallback Style")]
-        [Description("The name of the predefined style used as a fallback in case clang-format " +
-                     "is invoked with 'file' style, but can not find the configuration file.\n" +
-                     "Use 'none' fallback style to skip formatting.")]
-        [TypeConverter(typeof(FallbackStyleConverter))]
-        public string FallbackStyle
-        {
-            get { return fallbackStyle; }
-            set { fallbackStyle = value; }
-        }
-
-        [Category("Format Options")]
-        [DisplayName("Sort includes")]
-        [Description("Sort touched include lines.\n\n" +
-                     "See also: http://clang.llvm.org/docs/ClangFormat.html.")]
-        public bool SortIncludes
-        {
-            get { return sortIncludes; }
-            set { sortIncludes = value; }
-        }
-
-        [Category("Format On Save")]
-        [DisplayName("Enable")]
-        [Description("Enable running clang-format when modified files are saved. " +
-                     "Will only format if Style is found (ignores Fallback Style)."
-            )]
-        public bool FormatOnSave
-        {
-            get { return formatOnSave; }
-            set { formatOnSave = value; }
-        }
-
-        [Category("Format On Save")]
-        [DisplayName("File extensions")]
-        [Description("When formatting on save, clang-format will be applied only to " +
-                     "files with these extensions.")]
-        public string FormatOnSaveFileExtensions
-        {
-            get { return formatOnSaveFileExtensions; }
-            set { formatOnSaveFileExtensions = value; }
-        }
-    }
-
-    [PackageRegistration(UseManagedResourcesOnly = true)]
-    [InstalledProductRegistration("#110", "#112", "1.0", IconResourceID = 400)]
-    [ProvideMenuResource("Menus.ctmenu", 1)]
-    [ProvideAutoLoad(UIContextGuids80.SolutionExists)] // Load package on solution load
-    [Guid(GuidList.guidClangFormatPkgString)]
-    [ProvideOptionPage(typeof(OptionPageGrid), "LLVM/Clang", "ClangFormat", 0, 0, true)]
-    public sealed class ClangFormatPackage : Package
-    {
-        #region Package Members
-
-        RunningDocTableEventsDispatcher _runningDocTableEventsDispatcher;
-
-        protected override void Initialize()
-        {
-            base.Initialize();
-
-            _runningDocTableEventsDispatcher = new RunningDocTableEventsDispatcher(this);
-            _runningDocTableEventsDispatcher.BeforeSave += OnBeforeSave;
-
-            var commandService = GetService(typeof(IMenuCommandService)) as OleMenuCommandService;
-            if (commandService != null)
-            {
-                {
-                    var menuCommandID = new CommandID(GuidList.guidClangFormatCmdSet, (int)PkgCmdIDList.cmdidClangFormatSelection);
-                    var menuItem = new MenuCommand(MenuItemCallback, menuCommandID);
-                    commandService.AddCommand(menuItem);
-                }
-
-                {
-                    var menuCommandID = new CommandID(GuidList.guidClangFormatCmdSet, (int)PkgCmdIDList.cmdidClangFormatDocument);
-                    var menuItem = new MenuCommand(MenuItemCallback, menuCommandID);
-                    commandService.AddCommand(menuItem);
-                }
-            }
-        }
-        #endregion
-
-        OptionPageGrid GetUserOptions()
-        {
-            return (OptionPageGrid)GetDialogPage(typeof(OptionPageGrid));
-        }
-
-        private void MenuItemCallback(object sender, EventArgs args)
-        {
-            var mc = sender as System.ComponentModel.Design.MenuCommand;
-            if (mc == null)
-                return;
-
-            switch (mc.CommandID.ID)
-            {
-                case (int)PkgCmdIDList.cmdidClangFormatSelection:
-                    FormatSelection(GetUserOptions());
-                    break;
-
-                case (int)PkgCmdIDList.cmdidClangFormatDocument:
-                    FormatDocument(GetUserOptions());
-                    break;
-            }
-        }
-
-        private static bool FileHasExtension(string filePath, string fileExtensions)
-        {
-            var extensions = fileExtensions.ToLower().Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
-            return extensions.Contains(Path.GetExtension(filePath).ToLower());
-        }
-
-        private void OnBeforeSave(object sender, Document document)
-        {
-            var options = GetUserOptions();
-
-            if (!options.FormatOnSave)
-                return;
-
-            if (!FileHasExtension(document.FullName, options.FormatOnSaveFileExtensions))
-                return;
-
-            if (!Vsix.IsDocumentDirty(document))
-                return;
-
-            var optionsWithNoFallbackStyle = GetUserOptions().Clone();
-            optionsWithNoFallbackStyle.FallbackStyle = "none";
-            FormatDocument(document, optionsWithNoFallbackStyle);
-        }
-
-        /// <summary>
-        /// Runs clang-format on the current selection
-        /// </summary>
-        private void FormatSelection(OptionPageGrid options)
-        {
-            IWpfTextView view = Vsix.GetCurrentView();
-            if (view == null)
-                // We're not in a text view.
-                return;
-            string text = view.TextBuffer.CurrentSnapshot.GetText();
-            int start = view.Selection.Start.Position.GetContainingLine().Start.Position;
-            int end = view.Selection.End.Position.GetContainingLine().End.Position;
-
-            // clang-format doesn't support formatting a range that starts at the end
-            // of the file.
-            if (start >= text.Length && text.Length > 0)
-                start = text.Length - 1;
-            string path = Vsix.GetDocumentParent(view);
-            string filePath = Vsix.GetDocumentPath(view);
-
-            RunClangFormatAndApplyReplacements(text, start, end, path, filePath, options, view);
-        }
-
-        /// <summary>
-        /// Runs clang-format on the current document
-        /// </summary>
-        private void FormatDocument(OptionPageGrid options)
-        {
-            FormatView(Vsix.GetCurrentView(), options);
-        }
-
-        private void FormatDocument(Document document, OptionPageGrid options)
-        {
-            FormatView(Vsix.GetDocumentView(document), options);
-        }
-
-        private void FormatView(IWpfTextView view, OptionPageGrid options)
-        {
-            if (view == null)
-                // We're not in a text view.
-                return;
-
-            string filePath = Vsix.GetDocumentPath(view);
-            var path = Path.GetDirectoryName(filePath);
-
-            string text = view.TextBuffer.CurrentSnapshot.GetText();
-            if (!text.EndsWith(Environment.NewLine))
-            {
-                view.TextBuffer.Insert(view.TextBuffer.CurrentSnapshot.Length, Environment.NewLine);
-                text += Environment.NewLine;
-            }
-
-            RunClangFormatAndApplyReplacements(text, 0, text.Length, path, filePath, options, view);
-        }
-
-        private void RunClangFormatAndApplyReplacements(string text, int start, int end, string path, string filePath, OptionPageGrid options, IWpfTextView view)
-        {
-            try
-            {
-                string replacements = RunClangFormat(text, start, end, path, filePath, options);
-                ApplyClangFormatReplacements(replacements, view);
-            }
-            catch (Exception e)
-            {
-                var uiShell = (IVsUIShell)GetService(typeof(SVsUIShell));
-                var id = Guid.Empty;
-                int result;
-                uiShell.ShowMessageBox(
-                        0, ref id,
-                        "Error while running clang-format:",
-                        e.Message,
-                        string.Empty, 0,
-                        OLEMSGBUTTON.OLEMSGBUTTON_OK,
-                        OLEMSGDEFBUTTON.OLEMSGDEFBUTTON_FIRST,
-                        OLEMSGICON.OLEMSGICON_INFO,
-                        0, out result);
-            }
-        }
-
-        /// <summary>
-        /// Runs the given text through clang-format and returns the replacements as XML.
-        /// 
-        /// Formats the text in range start and end.
-        /// </summary>
-        private static string RunClangFormat(string text, int start, int end, string path, string filePath, OptionPageGrid options)
-        {
-            string vsixPath = Path.GetDirectoryName(
-                typeof(ClangFormatPackage).Assembly.Location);
-
-            System.Diagnostics.Process process = new System.Diagnostics.Process();
-            process.StartInfo.UseShellExecute = false;
-            process.StartInfo.FileName = vsixPath + "\\clang-format.exe";
-            char[] chars = text.ToCharArray();
-            int offset = Encoding.UTF8.GetByteCount(chars, 0, start);
-            int length = Encoding.UTF8.GetByteCount(chars, 0, end) - offset;
-            // Poor man's escaping - this will not work when quotes are already escaped
-            // in the input (but we don't need more).
-            string style = options.Style.Replace("\"", "\\\"");
-            string fallbackStyle = options.FallbackStyle.Replace("\"", "\\\"");
-            process.StartInfo.Arguments = " -offset " + offset +
-                                          " -length " + length +
-                                          " -output-replacements-xml " +
-                                          " -style \"" + style + "\"" +
-                                          " -fallback-style \"" + fallbackStyle + "\"";
-            if (options.SortIncludes)
-              process.StartInfo.Arguments += " -sort-includes ";
-            string assumeFilename = options.AssumeFilename;
-            if (string.IsNullOrEmpty(assumeFilename))
-                assumeFilename = filePath;
-            if (!string.IsNullOrEmpty(assumeFilename))
-              process.StartInfo.Arguments += " -assume-filename \"" + assumeFilename + "\"";
-            process.StartInfo.CreateNoWindow = true;
-            process.StartInfo.RedirectStandardInput = true;
-            process.StartInfo.RedirectStandardOutput = true;
-            process.StartInfo.RedirectStandardError = true;
-            if (path != null)
-                process.StartInfo.WorkingDirectory = path;
-            // We have to be careful when communicating via standard input / output,
-            // as writes to the buffers will block until they are read from the other side.
-            // Thus, we:
-            // 1. Start the process - clang-format.exe will start to read the input from the
-            //    standard input.
-            try
-            {
-                process.Start();
-            }
-            catch (Exception e)
-            {
-                throw new Exception(
-                    "Cannot execute " + process.StartInfo.FileName + ".\n\"" + 
-                    e.Message + "\".\nPlease make sure it is on the PATH.");
-            }
-            // 2. We write everything to the standard output - this cannot block, as clang-format
-            //    reads the full standard input before analyzing it without writing anything to the
-            //    standard output.
-            StreamWriter utf8Writer = new StreamWriter(process.StandardInput.BaseStream, new UTF8Encoding(false));
-            utf8Writer.Write(text);
-            // 3. We notify clang-format that the input is done - after this point clang-format
-            //    will start analyzing the input and eventually write the output.
-            utf8Writer.Close();
-            // 4. We must read clang-format's output before waiting for it to exit; clang-format
-            //    will close the channel by exiting.
-            string output = process.StandardOutput.ReadToEnd();
-            // 5. clang-format is done, wait until it is fully shut down.
-            process.WaitForExit();
-            if (process.ExitCode != 0)
-            {
-                // FIXME: If clang-format writes enough to the standard error stream to block,
-                // we will never reach this point; instead, read the standard error asynchronously.
-                throw new Exception(process.StandardError.ReadToEnd());
-            }
-            return output;
-        }
-
-        /// <summary>
-        /// Applies the clang-format replacements (xml) to the current view
-        /// </summary>
-        private static void ApplyClangFormatReplacements(string replacements, IWpfTextView view)
-        {
-            // clang-format returns no replacements if input text is empty
-            if (replacements.Length == 0)
-                return;
-
-            string text = view.TextBuffer.CurrentSnapshot.GetText();
-            byte[] bytes = Encoding.UTF8.GetBytes(text);
-
-            var root = XElement.Parse(replacements);
-            var edit = view.TextBuffer.CreateEdit();
-            foreach (XElement replacement in root.Descendants("replacement"))
-            {
-                int offset = int.Parse(replacement.Attribute("offset").Value);
-                int length = int.Parse(replacement.Attribute("length").Value);
-                var span = new Span(
-                    Encoding.UTF8.GetCharCount(bytes, 0, offset),
-                    Encoding.UTF8.GetCharCount(bytes, offset, length));
-                edit.Replace(span, replacement.Value);
-            }
-            edit.Apply();
-        }
-    }
-}
diff --git clang/tools/clang-format-vs/ClangFormat/GlobalSuppressions.cs clang/tools/clang-format-vs/ClangFormat/GlobalSuppressions.cs
deleted file mode 100644
index 175a74e291df..000000000000
--- clang/tools/clang-format-vs/ClangFormat/GlobalSuppressions.cs
+++ /dev/null
@@ -1,11 +0,0 @@
-// This file is used by Code Analysis to maintain SuppressMessage
-// attributes that are applied to this project. Project-level
-// suppressions either have no target or are given a specific target
-// and scoped to a namespace, type, member, etc.
-//
-// To add a suppression to this file, right-click the message in the
-// Error List, point to "Suppress Message(s)", and click "In Project
-// Suppression File". You do not need to add suppressions to this
-// file manually.
-
-[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1017:MarkAssembliesWithComVisible")]
diff --git clang/tools/clang-format-vs/ClangFormat/Guids.cs clang/tools/clang-format-vs/ClangFormat/Guids.cs
deleted file mode 100644
index ed1c12d61e4e..000000000000
--- clang/tools/clang-format-vs/ClangFormat/Guids.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-using System;
-
-namespace LLVM.ClangFormat
-{
-    static class GuidList
-    {
-        public const string guidClangFormatPkgString = "c5286038-25d3-4f65-83a8-51fa2df4a146";
-        public const string guidClangFormatCmdSetString = "e39cbab1-0f96-4022-a2bc-da5a9db7eb78";
-
-        public static readonly Guid guidClangFormatCmdSet = new Guid(guidClangFormatCmdSetString);
-    };
-}
diff --git clang/tools/clang-format-vs/ClangFormat/PkgCmdID.cs clang/tools/clang-format-vs/ClangFormat/PkgCmdID.cs
deleted file mode 100644
index c274d1ca1b4b..000000000000
--- clang/tools/clang-format-vs/ClangFormat/PkgCmdID.cs
+++ /dev/null
@@ -1,8 +0,0 @@
-namespace LLVM.ClangFormat
-{
-    static class PkgCmdIDList
-    {
-        public const uint cmdidClangFormatSelection = 0x100;
-        public const uint cmdidClangFormatDocument = 0x101;
-    };
-}
diff --git clang/tools/clang-format-vs/ClangFormat/Properties/AssemblyInfo.cs clang/tools/clang-format-vs/ClangFormat/Properties/AssemblyInfo.cs
deleted file mode 100644
index b1cef49414b5..000000000000
--- clang/tools/clang-format-vs/ClangFormat/Properties/AssemblyInfo.cs
+++ /dev/null
@@ -1,33 +0,0 @@
-using System;
-using System.Reflection;
-using System.Resources;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-// General Information about an assembly is controlled through the following 
-// set of attributes. Change these attribute values to modify the information
-// associated with an assembly.
-[assembly: AssemblyTitle("ClangFormat")]
-[assembly: AssemblyDescription("")]
-[assembly: AssemblyConfiguration("")]
-[assembly: AssemblyCompany("LLVM")]
-[assembly: AssemblyProduct("ClangFormat")]
-[assembly: AssemblyCopyright("")]
-[assembly: AssemblyTrademark("")]
-[assembly: AssemblyCulture("")]
-[assembly: ComVisible(false)]
-[assembly: CLSCompliant(false)]
-[assembly: NeutralResourcesLanguage("en-US")]
-
-// Version information for an assembly consists of the following four values:
-//
-//      Major Version
-//      Minor Version 
-//      Build Number
-//      Revision
-//
-// You can specify all the values or you can default the Revision and Build Numbers 
-// by using the '*' as shown below:
-
-[assembly: AssemblyVersion("1.1.0.0")]
-[assembly: AssemblyFileVersion("1.1.0.0")]
diff --git clang/tools/clang-format-vs/ClangFormat/Resources.Designer.cs clang/tools/clang-format-vs/ClangFormat/Resources.Designer.cs
deleted file mode 100644
index e3129b3db83a..000000000000
--- clang/tools/clang-format-vs/ClangFormat/Resources.Designer.cs
+++ /dev/null
@@ -1,63 +0,0 @@
-//------------------------------------------------------------------------------
-// <auto-generated>
-//     This code was generated by a tool.
-//     Runtime Version:4.0.30319.42000
-//
-//     Changes to this file may cause incorrect behavior and will be lost if
-//     the code is regenerated.
-// </auto-generated>
-//------------------------------------------------------------------------------
-
-namespace LLVM.ClangFormat {
-    using System;
-    
-    
-    /// <summary>
-    ///   A strongly-typed resource class, for looking up localized strings, etc.
-    /// </summary>
-    // This class was auto-generated by the StronglyTypedResourceBuilder
-    // class via a tool like ResGen or Visual Studio.
-    // To add or remove a member, edit your .ResX file then rerun ResGen
-    // with the /str option, or rebuild your VS project.
-    [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")]
-    [global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
-    [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
-    internal class Resources {
-        
-        private static global::System.Resources.ResourceManager resourceMan;
-        
-        private static global::System.Globalization.CultureInfo resourceCulture;
-        
-        [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
-        internal Resources() {
-        }
-        
-        /// <summary>
-        ///   Returns the cached ResourceManager instance used by this class.
-        /// </summary>
-        [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
-        internal static global::System.Resources.ResourceManager ResourceManager {
-            get {
-                if (object.ReferenceEquals(resourceMan, null)) {
-                    global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("LLVM.ClangFormat.Resources", typeof(Resources).Assembly);
-                    resourceMan = temp;
-                }
-                return resourceMan;
-            }
-        }
-        
-        /// <summary>
-        ///   Overrides the current thread's CurrentUICulture property for all
-        ///   resource lookups using this strongly typed resource class.
-        /// </summary>
-        [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
-        internal static global::System.Globalization.CultureInfo Culture {
-            get {
-                return resourceCulture;
-            }
-            set {
-                resourceCulture = value;
-            }
-        }
-    }
-}
diff --git clang/tools/clang-format-vs/ClangFormat/Resources.resx clang/tools/clang-format-vs/ClangFormat/Resources.resx
deleted file mode 100644
index 352987aa07bc..000000000000
--- clang/tools/clang-format-vs/ClangFormat/Resources.resx
+++ /dev/null
@@ -1,129 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!--
-    VS SDK Notes: This resx file contains the resources that will be consumed directly by your package.
-    For example, if you chose to create a tool window, there is a resource with ID 'CanNotCreateWindow'. This
-    is used in VsPkg.cs to determine the string to show the user if there is an error when attempting to create
-    the tool window.
-
-    Resources that are accessed directly from your package *by Visual Studio* are stored in the VSPackage.resx 
-    file.
--->
-<root>
-  <!-- 
-    Microsoft ResX Schema 
-    
-    Version 2.0
-    
-    The primary goals of this format is to allow a simple XML format 
-    that is mostly human readable. The generation and parsing of the 
-    various data types are done through the TypeConverter classes 
-    associated with the data types.
-    
-    Example:
-    
-    ... ado.net/XML headers & schema ...
-    <resheader name="resmimetype">text/microsoft-resx</resheader>
-    <resheader name="version">2.0</resheader>
-    <resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
-    <resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
-    <data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
-    <data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
-    <data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
-        <value>[base64 mime encoded serialized .NET Framework object]</value>
-    </data>
-    <data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
-        <value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
-        <comment>This is a comment</comment>
-    </data>
-                
-    There are any number of "resheader" rows that contain simple 
-    name/value pairs.
-    
-    Each data row contains a name, and value. The row also contains a 
-    type or mimetype. Type corresponds to a .NET class that support 
-    text/value conversion through the TypeConverter architecture. 
-    Classes that don't support this are serialized and stored with the 
-    mimetype set.
-    
-    The mimetype is used for serialized objects, and tells the 
-    ResXResourceReader how to depersist the object. This is currently not 
-    extensible. For a given mimetype the value must be set accordingly:
-    
-    Note - application/x-microsoft.net.object.binary.base64 is the format 
-    that the ResXResourceWriter will generate, however the reader can 
-    read any of the formats listed below.
-    
-    mimetype: application/x-microsoft.net.object.binary.base64
-    value   : The object must be serialized with 
-            : System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
-            : and then encoded with base64 encoding.
-    
-    mimetype: application/x-microsoft.net.object.soap.base64
-    value   : The object must be serialized with 
-            : System.Runtime.Serialization.Formatters.Soap.SoapFormatter
-            : and then encoded with base64 encoding.
-
-    mimetype: application/x-microsoft.net.object.bytearray.base64
-    value   : The object must be serialized into a byte array 
-            : using a System.ComponentModel.TypeConverter
-            : and then encoded with base64 encoding.
-    -->
-  <xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
-    <xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
-    <xsd:element name="root" msdata:IsDataSet="true">
-      <xsd:complexType>
-        <xsd:choice maxOccurs="unbounded">
-          <xsd:element name="metadata">
-            <xsd:complexType>
-              <xsd:sequence>
-                <xsd:element name="value" type="xsd:string" minOccurs="0" />
-              </xsd:sequence>
-              <xsd:attribute name="name" use="required" type="xsd:string" />
-              <xsd:attribute name="type" type="xsd:string" />
-              <xsd:attribute name="mimetype" type="xsd:string" />
-              <xsd:attribute ref="xml:space" />
-            </xsd:complexType>
-          </xsd:element>
-          <xsd:element name="assembly">
-            <xsd:complexType>
-              <xsd:attribute name="alias" type="xsd:string" />
-              <xsd:attribute name="name" type="xsd:string" />
-            </xsd:complexType>
-          </xsd:element>
-          <xsd:element name="data">
-            <xsd:complexType>
-              <xsd:sequence>
-                <xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
-                <xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
-              </xsd:sequence>
-              <xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
-              <xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
-              <xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
-              <xsd:attribute ref="xml:space" />
-            </xsd:complexType>
-          </xsd:element>
-          <xsd:element name="resheader">
-            <xsd:complexType>
-              <xsd:sequence>
-                <xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
-              </xsd:sequence>
-              <xsd:attribute name="name" type="xsd:string" use="required" />
-            </xsd:complexType>
-          </xsd:element>
-        </xsd:choice>
-      </xsd:complexType>
-    </xsd:element>
-  </xsd:schema>
-  <resheader name="resmimetype">
-    <value>text/microsoft-resx</value>
-  </resheader>
-  <resheader name="version">
-    <value>2.0</value>
-  </resheader>
-  <resheader name="reader">
-    <value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
-  </resheader>
-  <resheader name="writer">
-    <value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
-  </resheader>
-</root>
\ No newline at end of file
diff --git clang/tools/clang-format-vs/ClangFormat/Resources/Images_32bit.bmp clang/tools/clang-format-vs/ClangFormat/Resources/Images_32bit.bmp
deleted file mode 100644
index 2fa7ab009985..000000000000
Binary files clang/tools/clang-format-vs/ClangFormat/Resources/Images_32bit.bmp and /dev/null differ
diff --git clang/tools/clang-format-vs/ClangFormat/Resources/Package.ico clang/tools/clang-format-vs/ClangFormat/Resources/Package.ico
deleted file mode 100644
index ea3b23fe8d4b..000000000000
Binary files clang/tools/clang-format-vs/ClangFormat/Resources/Package.ico and /dev/null differ
diff --git clang/tools/clang-format-vs/ClangFormat/RunningDocTableEventsDispatcher.cs clang/tools/clang-format-vs/ClangFormat/RunningDocTableEventsDispatcher.cs
deleted file mode 100644
index 163f68dbda3c..000000000000
--- clang/tools/clang-format-vs/ClangFormat/RunningDocTableEventsDispatcher.cs
+++ /dev/null
@@ -1,79 +0,0 @@
-using EnvDTE;
-using Microsoft.VisualStudio;
-using Microsoft.VisualStudio.Shell;
-using Microsoft.VisualStudio.Shell.Interop;
-using System.Linq;
-    
-namespace LLVM.ClangFormat
-{
-    // Exposes event sources for IVsRunningDocTableEvents3 events.
-    internal sealed class RunningDocTableEventsDispatcher : IVsRunningDocTableEvents3
-    {
-        private RunningDocumentTable _runningDocumentTable;
-        private DTE _dte;
-
-        public delegate void OnBeforeSaveHander(object sender, Document document);
-        public event OnBeforeSaveHander BeforeSave;
-
-        public RunningDocTableEventsDispatcher(Package package)
-        {
-            _runningDocumentTable = new RunningDocumentTable(package);
-            _runningDocumentTable.Advise(this);
-            _dte = (DTE)Package.GetGlobalService(typeof(DTE));
-        }
-
-        public int OnAfterAttributeChange(uint docCookie, uint grfAttribs)
-        {
-            return VSConstants.S_OK;
-        }
-
-        public int OnAfterAttributeChangeEx(uint docCookie, uint grfAttribs, IVsHierarchy pHierOld, uint itemidOld, string pszMkDocumentOld, IVsHierarchy pHierNew, uint itemidNew, string pszMkDocumentNew)
-        {
-            return VSConstants.S_OK;
-        }
-
-        public int OnAfterDocumentWindowHide(uint docCookie, IVsWindowFrame pFrame)
-        {
-            return VSConstants.S_OK;
-        }
-
-        public int OnAfterFirstDocumentLock(uint docCookie, uint dwRDTLockType, uint dwReadLocksRemaining, uint dwEditLocksRemaining)
-        {
-            return VSConstants.S_OK;
-        }
-
-        public int OnAfterSave(uint docCookie)
-        {
-            return VSConstants.S_OK;
-        }
-
-        public int OnBeforeDocumentWindowShow(uint docCookie, int fFirstShow, IVsWindowFrame pFrame)
-        {
-            return VSConstants.S_OK;
-        }
-
-        public int OnBeforeLastDocumentUnlock(uint docCookie, uint dwRDTLockType, uint dwReadLocksRemaining, uint dwEditLocksRemaining)
-        {
-            return VSConstants.S_OK;
-        }
-
-        public int OnBeforeSave(uint docCookie)
-        {
-            if (BeforeSave != null)
-            {
-                var document = FindDocumentByCookie(docCookie);
-                if (document != null) // Not sure why this happens sometimes
-                {
-                    BeforeSave(this, FindDocumentByCookie(docCookie));
-                }
-            }
-            return VSConstants.S_OK;
-        }
-
-        private Document FindDocumentByCookie(uint docCookie)
-        {
-            var documentInfo = _runningDocumentTable.GetDocumentInfo(docCookie);
-            return _dte.Documents.Cast<Document>().FirstOrDefault(doc => doc.FullName == documentInfo.Moniker);
-        }
-    }
-}
diff --git clang/tools/clang-format-vs/ClangFormat/VSPackage.resx clang/tools/clang-format-vs/ClangFormat/VSPackage.resx
deleted file mode 100644
index 81102d38a07a..000000000000
--- clang/tools/clang-format-vs/ClangFormat/VSPackage.resx
+++ /dev/null
@@ -1,140 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!--
-    VS SDK Notes: This resx file contains the resources that will be consumed from your package by Visual Studio.
-    For example, Visual Studio will attempt to load resource '400' from this resource stream when it needs to
-    load your package's icon. Because Visual Studio will always look in the VSPackage.resources stream first for
-    resources it needs, you should put additional resources that Visual Studio will load directly into this resx 
-    file. 
-
-    Resources that you would like to access directly from your package in a strong-typed fashion should be stored 
-    in Resources.resx or another resx file.
--->
-<root>
-  <!-- 
-    Microsoft ResX Schema 
-    
-    Version 2.0
-    
-    The primary goals of this format is to allow a simple XML format 
-    that is mostly human readable. The generation and parsing of the 
-    various data types are done through the TypeConverter classes 
-    associated with the data types.
-    
-    Example:
-    
-    ... ado.net/XML headers & schema ...
-    <resheader name="resmimetype">text/microsoft-resx</resheader>
-    <resheader name="version">2.0</resheader>
-    <resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
-    <resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
-    <data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
-    <data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
-    <data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
-        <value>[base64 mime encoded serialized .NET Framework object]</value>
-    </data>
-    <data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
-        <value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
-        <comment>This is a comment</comment>
-    </data>
-                
-    There are any number of "resheader" rows that contain simple 
-    name/value pairs.
-    
-    Each data row contains a name, and value. The row also contains a 
-    type or mimetype. Type corresponds to a .NET class that support 
-    text/value conversion through the TypeConverter architecture. 
-    Classes that don't support this are serialized and stored with the 
-    mimetype set.
-    
-    The mimetype is used for serialized objects, and tells the 
-    ResXResourceReader how to depersist the object. This is currently not 
-    extensible. For a given mimetype the value must be set accordingly:
-    
-    Note - application/x-microsoft.net.object.binary.base64 is the format 
-    that the ResXResourceWriter will generate, however the reader can 
-    read any of the formats listed below.
-    
-    mimetype: application/x-microsoft.net.object.binary.base64
-    value   : The object must be serialized with 
-            : System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
-            : and then encoded with base64 encoding.
-    
-    mimetype: application/x-microsoft.net.object.soap.base64
-    value   : The object must be serialized with 
-            : System.Runtime.Serialization.Formatters.Soap.SoapFormatter
-            : and then encoded with base64 encoding.
-
-    mimetype: application/x-microsoft.net.object.bytearray.base64
-    value   : The object must be serialized into a byte array 
-            : using a System.ComponentModel.TypeConverter
-            : and then encoded with base64 encoding.
-    -->
-  <xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
-    <xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
-    <xsd:element name="root" msdata:IsDataSet="true">
-      <xsd:complexType>
-        <xsd:choice maxOccurs="unbounded">
-          <xsd:element name="metadata">
-            <xsd:complexType>
-              <xsd:sequence>
-                <xsd:element name="value" type="xsd:string" minOccurs="0" />
-              </xsd:sequence>
-              <xsd:attribute name="name" use="required" type="xsd:string" />
-              <xsd:attribute name="type" type="xsd:string" />
-              <xsd:attribute name="mimetype" type="xsd:string" />
-              <xsd:attribute ref="xml:space" />
-            </xsd:complexType>
-          </xsd:element>
-          <xsd:element name="assembly">
-            <xsd:complexType>
-              <xsd:attribute name="alias" type="xsd:string" />
-              <xsd:attribute name="name" type="xsd:string" />
-            </xsd:complexType>
-          </xsd:element>
-          <xsd:element name="data">
-            <xsd:complexType>
-              <xsd:sequence>
-                <xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
-                <xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
-              </xsd:sequence>
-              <xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
-              <xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
-              <xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
-              <xsd:attribute ref="xml:space" />
-            </xsd:complexType>
-          </xsd:element>
-          <xsd:element name="resheader">
-            <xsd:complexType>
-              <xsd:sequence>
-                <xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
-              </xsd:sequence>
-              <xsd:attribute name="name" type="xsd:string" use="required" />
-            </xsd:complexType>
-          </xsd:element>
-        </xsd:choice>
-      </xsd:complexType>
-    </xsd:element>
-  </xsd:schema>
-  <resheader name="resmimetype">
-    <value>text/microsoft-resx</value>
-  </resheader>
-  <resheader name="version">
-    <value>2.0</value>
-  </resheader>
-  <resheader name="reader">
-    <value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
-  </resheader>
-  <resheader name="writer">
-    <value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
-  </resheader>
-  <assembly alias="System.Windows.Forms" name="System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
-  <data name="110" xml:space="preserve">
-    <value>ClangFormat</value>
-  </data>
-  <data name="112" xml:space="preserve">
-    <value>Formats code by calling the clang-format executable.</value>
-  </data>
-  <data name="400" type="System.Resources.ResXFileRef, System.Windows.Forms">
-    <value>Resources\Package.ico;System.Drawing.Icon, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a</value>
-  </data>
-</root>
\ No newline at end of file
diff --git clang/tools/clang-format-vs/ClangFormat/Vsix.cs clang/tools/clang-format-vs/ClangFormat/Vsix.cs
deleted file mode 100644
index 0d86cb59828a..000000000000
--- clang/tools/clang-format-vs/ClangFormat/Vsix.cs
+++ /dev/null
@@ -1,96 +0,0 @@
-using EnvDTE;
-using Microsoft.VisualStudio.Editor;
-using Microsoft.VisualStudio.Shell;
-using Microsoft.VisualStudio.Shell.Interop;
-using Microsoft.VisualStudio.Text;
-using Microsoft.VisualStudio.Text.Editor;
-using Microsoft.VisualStudio.TextManager.Interop;
-using System;
-using System.IO;
-
-namespace LLVM.ClangFormat
-{
-    internal sealed class Vsix
-    {
-        /// <summary>
-        /// Returns the currently active view if it is a IWpfTextView.
-        /// </summary>
-        public static IWpfTextView GetCurrentView()
-        {
-            // The SVsTextManager is a service through which we can get the active view.
-            var textManager = (IVsTextManager)Package.GetGlobalService(typeof(SVsTextManager));
-            IVsTextView textView;
-            textManager.GetActiveView(1, null, out textView);
-
-            // Now we have the active view as IVsTextView, but the text interfaces we need
-            // are in the IWpfTextView.
-            return VsToWpfTextView(textView);
-        }
-
-        public static bool IsDocumentDirty(Document document)
-        {
-            var textView = GetDocumentView(document);
-            var textDocument = GetTextDocument(textView);
-            return textDocument?.IsDirty == true;
-        }
-
-        public static IWpfTextView GetDocumentView(Document document)
-        {
-            var textView = GetVsTextViewFrompPath(document.FullName);
-            return VsToWpfTextView(textView);
-        }
-
-        public static IWpfTextView VsToWpfTextView(IVsTextView textView)
-        {
-            var userData = (IVsUserData)textView;
-            if (userData == null)
-                return null;
-            Guid guidWpfViewHost = DefGuidList.guidIWpfTextViewHost;
-            object host;
-            userData.GetData(ref guidWpfViewHost, out host);
-            return ((IWpfTextViewHost)host).TextView;
-        }
-
-        public static IVsTextView GetVsTextViewFrompPath(string filePath)
-        {
-            // From http://stackoverflow.com/a/2427368/4039972
-            var dte2 = (EnvDTE80.DTE2)Package.GetGlobalService(typeof(SDTE));
-            var sp = (Microsoft.VisualStudio.OLE.Interop.IServiceProvider)dte2;
-            var serviceProvider = new Microsoft.VisualStudio.Shell.ServiceProvider(sp);
-
-            IVsUIHierarchy uiHierarchy;
-            uint itemID;
-            IVsWindowFrame windowFrame;
-            if (VsShellUtilities.IsDocumentOpen(serviceProvider, filePath, Guid.Empty,
-                out uiHierarchy, out itemID, out windowFrame))
-            {
-                // Get the IVsTextView from the windowFrame.
-                return VsShellUtilities.GetTextView(windowFrame);
-            }
-            return null;
-        }
-
-        public static ITextDocument GetTextDocument(IWpfTextView view)
-        {
-            ITextDocument document;
-            if (view != null && view.TextBuffer.Properties.TryGetProperty(typeof(ITextDocument), out document))
-                return document;
-            return null;
-        }
-
-        public static string GetDocumentParent(IWpfTextView view)
-        {
-            ITextDocument document = GetTextDocument(view);
-            if (document != null)
-            {
-                return Directory.GetParent(document.FilePath).ToString();
-            }
-            return null;
-        }
-
-        public static string GetDocumentPath(IWpfTextView view)
-        {
-            return GetTextDocument(view)?.FilePath;
-        }
-    }
-}
diff --git clang/tools/clang-format-vs/ClangFormat/license.txt clang/tools/clang-format-vs/ClangFormat/license.txt
deleted file mode 100644
index 63c17f148e7a..000000000000
--- clang/tools/clang-format-vs/ClangFormat/license.txt
+++ /dev/null
@@ -1,261 +0,0 @@
-==============================================================================
-The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
-==============================================================================
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-    1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-    2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-    3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-    4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-    5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-    6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-    7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-    8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-    9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-    END OF TERMS AND CONDITIONS
-
-    APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-    Copyright [yyyy] [name of copyright owner]
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
----- LLVM Exceptions to the Apache 2.0 License ----
-
-As an exception, if, as a result of your compiling your source code, portions
-of this Software are embedded into an Object form of such source code, you
-may redistribute such embedded portions in such Object form without complying
-with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
-
-In addition, if you combine or link compiled forms of this Software with
-software that is licensed under the GPLv2 ("Combined Software") and if a
-court of competent jurisdiction determines that the patent provision (Section
-3), the indemnity provision (Section 9) or other Section of the License
-conflicts with the conditions of the GPLv2, you may retroactively and
-prospectively choose to deem waived or otherwise exclude such Section(s) of
-the License, but only in their entirety and only with respect to the Combined
-Software.
-
-==============================================================================
-Software from third parties included in the LLVM Project:
-==============================================================================
-The LLVM Project contains third party software which is under different license
-terms. All such code will be identified clearly using at least one of two
-mechanisms:
-1) It will be in a separate directory tree with its own `LICENSE.txt` or
-   `LICENSE` file at the top containing the specific license and restrictions
-   which apply to that software, or
-2) It will contain specific license and restriction terms at the top of every
-   file.
-
-==============================================================================
-Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy):
-==============================================================================
-University of Illinois/NCSA
-Open Source License
-
-Copyright (c) 2007-2018 University of Illinois at Urbana-Champaign.
-All rights reserved.
-
-Developed by:
-
-    LLVM Team
-
-    University of Illinois at Urbana-Champaign
-
-    http://llvm.org
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal with the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-
-    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers.
-
-    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution.
-
-    * Neither the names of the LLVM Team, University of Illinois at Urbana-Champaign, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
diff --git clang/tools/clang-format-vs/ClangFormat/packages.config clang/tools/clang-format-vs/ClangFormat/packages.config
deleted file mode 100644
index 07dc281178f5..000000000000
--- clang/tools/clang-format-vs/ClangFormat/packages.config
+++ /dev/null
@@ -1,21 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<packages>
-  <package id="VSSDK.CoreUtility" version="10.0.4" targetFramework="net40" />
-  <package id="VSSDK.CoreUtility.10" version="10.0.4" targetFramework="net40" />
-  <package id="VSSDK.Editor" version="10.0.4" targetFramework="net40" />
-  <package id="VSSDK.Editor.10" version="10.0.4" targetFramework="net40" />
-  <package id="VSSDK.IDE" version="7.0.4" targetFramework="net40" />
-  <package id="VSSDK.IDE.10" version="10.0.4" targetFramework="net40" />
-  <package id="VSSDK.IDE.8" version="8.0.4" targetFramework="net40" />
-  <package id="VSSDK.IDE.9" version="9.0.3" targetFramework="net40" />
-  <package id="VSSDK.OLE.Interop" version="7.0.4" targetFramework="net40" />
-  <package id="VSSDK.Shell.10" version="10.0.3" targetFramework="net40" />
-  <package id="VSSDK.Shell.Immutable.10" version="10.0.3" targetFramework="net40" />
-  <package id="VSSDK.Shell.Interop" version="7.0.4" targetFramework="net40" />
-  <package id="VSSDK.Shell.Interop.8" version="8.0.3" targetFramework="net40" />
-  <package id="VSSDK.Shell.Interop.9" version="9.0.3" targetFramework="net40" />
-  <package id="VSSDK.Text" version="10.0.4" targetFramework="net40" />
-  <package id="VSSDK.Text.10" version="10.0.4" targetFramework="net40" />
-  <package id="VSSDK.TextManager.Interop" version="7.0.4" targetFramework="net40" />
-  <package id="VSSDK.TextManager.Interop.8" version="8.0.4" targetFramework="net40" />
-</packages>
\ No newline at end of file
diff --git clang/tools/clang-format-vs/README.txt clang/tools/clang-format-vs/README.txt
deleted file mode 100644
index 2cac5b9af9e3..000000000000
--- clang/tools/clang-format-vs/README.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-This directory contains a VSPackage project to generate a Visual Studio extension
-for clang-format.
-
-Build prerequisites are:
-- Visual Studio 2015
-- Extensions SDK (you'll be prompted to install it if you open ClangFormat.sln)
-
-The extension is built using CMake to generate the usual LLVM.sln by setting
-the following CMake vars:
-
-- BUILD_CLANG_FORMAT_VS_PLUGIN=ON
-
-- NUGET_EXE_DIR=path/to/nuget_dir (unless nuget.exe is already available in PATH)
-
-example:
-  cd /d C:\code\llvm
-  mkdir build & cd build
-  cmake -DBUILD_CLANG_FORMAT_VS_PLUGIN=ON -DNUGET_EXE_DIR=C:\nuget ..
-
-Once LLVM.sln is generated, build the clang_format_vsix target, which will build
-ClangFormat.sln, the C# extension application.
-
-The CMake build will copy clang-format.exe and LICENSE.TXT into the ClangFormat/
-directory so they can be bundled with the plug-in, as well as creating
-ClangFormat/source.extension.vsixmanifest. Once the plug-in has been built with
-CMake once, it can be built manually from the ClangFormat.sln solution in Visual
-Studio.
-
-===========
- Debugging
-===========
-
-Once you've built the clang_format_vsix project from LLVM.sln at least once,
-open ClangFormat.sln in Visual Studio, then:
-
-- Make sure the "Debug" target is selected
-- Open the ClangFormat project properties
-- Select the Debug tab
-- Set "Start external program:" to where your devenv.exe is installed. Typically
-  it's "C:\Program Files (x86)\Microsoft Visual Studio 14.0\Common7\IDE\devenv.exe"
-- Set "Command line arguments" to: /rootsuffix Exp
-- You can now set breakpoints if you like
-- Press F5 to build and run with debugger
-
-If all goes well, a new instance of Visual Studio will be launched in a special
-mode where it uses the experimental hive instead of the normal configuration hive.
-By default, when you build a VSIX project in Visual Studio, it auto-registers the
-extension in the experimental hive, allowing you to test it. In the new Visual Studio
-instance, open or create a C++ solution, and you should now see the Clang Format
-entries in the Tool menu. You can test it out, and any breakpoints you set will be
-hit where you can debug as usual.
diff --git clang/tools/clang-format-vs/source.extension.vsixmanifest.in clang/tools/clang-format-vs/source.extension.vsixmanifest.in
deleted file mode 100644
index d4820c051ad7..000000000000
--- clang/tools/clang-format-vs/source.extension.vsixmanifest.in
+++ /dev/null
@@ -1,19 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<PackageManifest Version="2.0.0" xmlns="http://schemas.microsoft.com/developer/vsx-schema/2011" xmlns:d="http://schemas.microsoft.com/developer/vsx-schema-design/2011">
-  <Metadata>
-    <Identity Id="3cb18a5e-97e9-11e7-abc4-cec278b6b50a" Version="@CLANG_FORMAT_VS_VERSION@" Language="en-US" Publisher="LLVM"/>
-    <DisplayName>ClangFormat</DisplayName>
-    <Description xml:space="preserve">A tool to format C/C++/Obj-C code.</Description>
-    <MoreInfo>http://clang.llvm.org/docs/ClangFormat.html</MoreInfo>
-    <License>license.txt</License>
-  </Metadata>
-  <Installation InstalledByMsi="false">
-    <InstallationTarget Id="Microsoft.VisualStudio.Pro" Version="[11.0, 17.0)" />
-  </Installation>
-  <Dependencies>
-    <Dependency Id="Microsoft.VisualStudio.MPF" MinVersion="11.0" DisplayName="Visual Studio MPF" />
-  </Dependencies>
-  <Prerequisites>
-    <Prerequisite Id="Microsoft.VisualStudio.Component.CoreEditor" Version="[11.0,)" DisplayName="Visual Studio core editor" />
-  </Prerequisites>
-</PackageManifest>
diff --git clang/tools/clang-scan-deps/ClangScanDeps.cpp clang/tools/clang-scan-deps/ClangScanDeps.cpp
index a8f6150dd349..259058c798e5 100644
--- clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -915,6 +915,13 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   if (Format == ScanningOutputFormat::Full)
     FD.emplace(ModuleName.empty() ? Inputs.size() : 0);
 
+  std::atomic<size_t> NumStatusCalls = 0;
+  std::atomic<size_t> NumOpenFileForReadCalls = 0;
+  std::atomic<size_t> NumDirBeginCalls = 0;
+  std::atomic<size_t> NumGetRealPathCalls = 0;
+  std::atomic<size_t> NumExistsCalls = 0;
+  std::atomic<size_t> NumIsLocalCalls = 0;
+
   auto ScanningTask = [&](DependencyScanningService &Service) {
     DependencyScanningTool WorkerTool(Service);
 
@@ -999,10 +1006,21 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
           HadErrors = true;
       }
     }
+
+    WorkerTool.getWorkerVFS().visit([&](llvm::vfs::FileSystem &VFS) {
+      if (auto *T = dyn_cast_or_null<llvm::vfs::TracingFileSystem>(&VFS)) {
+        NumStatusCalls += T->NumStatusCalls;
+        NumOpenFileForReadCalls += T->NumOpenFileForReadCalls;
+        NumDirBeginCalls += T->NumDirBeginCalls;
+        NumGetRealPathCalls += T->NumGetRealPathCalls;
+        NumExistsCalls += T->NumExistsCalls;
+        NumIsLocalCalls += T->NumIsLocalCalls;
+      }
+    });
   };
 
   DependencyScanningService Service(ScanMode, Format, OptimizeArgs,
-                                    EagerLoadModules);
+                                    EagerLoadModules, /*TraceVFS=*/Verbose);
 
   llvm::Timer T;
   T.startTimer();
@@ -1025,6 +1043,16 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   }
 
   T.stopTimer();
+
+  if (Verbose)
+    llvm::errs() << "\n*** Virtual File System Stats:\n"
+                 << NumStatusCalls << " status() calls\n"
+                 << NumOpenFileForReadCalls << " openFileForRead() calls\n"
+                 << NumDirBeginCalls << " dir_begin() calls\n"
+                 << NumGetRealPathCalls << " getRealPath() calls\n"
+                 << NumExistsCalls << " exists() calls\n"
+                 << NumIsLocalCalls << " isLocal() calls\n";
+
   if (PrintTiming)
     llvm::errs() << llvm::format(
         "clang-scan-deps timing: %0.2fs wall, %0.2fs process\n",
diff --git clang/tools/driver/driver.cpp clang/tools/driver/driver.cpp
index 83b5bbb71f52..686eaea0aa7c 100644
--- clang/tools/driver/driver.cpp
+++ clang/tools/driver/driver.cpp
@@ -29,6 +29,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/OptTable.h"
 #include "llvm/Option/Option.h"
@@ -52,6 +53,7 @@
 #include <optional>
 #include <set>
 #include <system_error>
+
 using namespace clang;
 using namespace clang::driver;
 using namespace llvm::opt;
diff --git clang/tools/libclang/CIndex.cpp clang/tools/libclang/CIndex.cpp
index 08562e05ab65..e821c5e4c588 100644
--- clang/tools/libclang/CIndex.cpp
+++ clang/tools/libclang/CIndex.cpp
@@ -1557,6 +1557,9 @@ bool CursorVisitor::VisitTemplateName(TemplateName Name, SourceLocation Loc) {
     return Visit(MakeCursorTemplateRef(
         Name.getAsSubstTemplateTemplateParmPack()->getParameterPack(), Loc,
         TU));
+
+  case TemplateName::DeducedTemplate:
+    llvm_unreachable("DeducedTemplate shouldn't appear in source");
   }
 
   llvm_unreachable("Invalid TemplateName::Kind!");
diff --git clang/tools/libclang/CXStoredDiagnostic.cpp clang/tools/libclang/CXStoredDiagnostic.cpp
index c4c24876e70d..03018229549b 100644
--- clang/tools/libclang/CXStoredDiagnostic.cpp
+++ clang/tools/libclang/CXStoredDiagnostic.cpp
@@ -33,14 +33,14 @@ CXDiagnosticSeverity CXStoredDiagnostic::getSeverity() const {
     case DiagnosticsEngine::Error:   return CXDiagnostic_Error;
     case DiagnosticsEngine::Fatal:   return CXDiagnostic_Fatal;
   }
-  
+
   llvm_unreachable("Invalid diagnostic level");
 }
 
 CXSourceLocation CXStoredDiagnostic::getLocation() const {
   if (Diag.getLocation().isInvalid())
     return clang_getNullLocation();
-  
+
   return translateSourceLocation(Diag.getLocation().getManager(),
                                  LangOpts, Diag.getLocation());
 }
@@ -57,7 +57,7 @@ CXString CXStoredDiagnostic::getDiagnosticOption(CXString *Disable) const {
       *Disable = cxstring::createDup((Twine("-Wno-") + Option).str());
     return cxstring::createDup((Twine("-W") + Option).str());
   }
-  
+
   if (ID == diag::fatal_too_many_errors) {
     if (Disable)
       *Disable = cxstring::createRef("-ferror-limit=0");
@@ -79,7 +79,7 @@ CXString CXStoredDiagnostic::getCategoryText() const {
 unsigned CXStoredDiagnostic::getNumRanges() const {
   if (Diag.getLocation().isInvalid())
     return 0;
-  
+
   return Diag.range_size();
 }
 
@@ -92,12 +92,12 @@ CXSourceRange CXStoredDiagnostic::getRange(unsigned int Range) const {
 
 unsigned CXStoredDiagnostic::getNumFixIts() const {
   if (Diag.getLocation().isInvalid())
-    return 0;    
+    return 0;
   return Diag.fixit_size();
 }
 
 CXString CXStoredDiagnostic::getFixIt(unsigned FixIt,
-                                      CXSourceRange *ReplacementRange) const {  
+                                      CXSourceRange *ReplacementRange) const {
   const FixItHint &Hint = Diag.fixit_begin()[FixIt];
   if (ReplacementRange) {
     // Create a range that covers the entire replacement (or
@@ -108,4 +108,3 @@ CXString CXStoredDiagnostic::getFixIt(unsigned FixIt,
   }
   return cxstring::createDup(Hint.CodeToInsert);
 }
-
diff --git clang/unittests/AST/ASTImporterTest.cpp clang/unittests/AST/ASTImporterTest.cpp
index cc87e83e8605..aacecd3fbcd9 100644
--- clang/unittests/AST/ASTImporterTest.cpp
+++ clang/unittests/AST/ASTImporterTest.cpp
@@ -9836,41 +9836,75 @@ TEST_P(ASTImporterOptionSpecificTestBase, ImportMultipleAnonymousEnumDecls) {
 struct ImportTemplateParmDeclDefaultValue
     : public ASTImporterOptionSpecificTestBase {
 protected:
-  void checkTemplateParams(RedeclarableTemplateDecl *D) {
-    auto *CanD = cast<RedeclarableTemplateDecl>(D->getCanonicalDecl());
-    auto *CanNonTypeP = cast<NonTypeTemplateParmDecl>(
-        CanD->getTemplateParameters()->getParam(0));
-    auto *CanTypeP =
-        cast<TemplateTypeParmDecl>(CanD->getTemplateParameters()->getParam(1));
-    auto *CanTemplateP = cast<TemplateTemplateParmDecl>(
-        CanD->getTemplateParameters()->getParam(2));
-    EXPECT_FALSE(CanNonTypeP->getDefaultArgStorage().isInherited());
-    EXPECT_FALSE(CanTypeP->getDefaultArgStorage().isInherited());
-    EXPECT_FALSE(CanTemplateP->getDefaultArgStorage().isInherited());
-    for (Decl *Redecl : D->redecls()) {
-      auto *ReD = cast<RedeclarableTemplateDecl>(Redecl);
-      if (ReD != CanD) {
-        auto *NonTypeP = cast<NonTypeTemplateParmDecl>(
-            ReD->getTemplateParameters()->getParam(0));
-        auto *TypeP = cast<TemplateTypeParmDecl>(
-            ReD->getTemplateParameters()->getParam(1));
-        auto *TemplateP = cast<TemplateTemplateParmDecl>(
-            ReD->getTemplateParameters()->getParam(2));
-        EXPECT_TRUE(NonTypeP->getDefaultArgStorage().isInherited());
-        EXPECT_TRUE(TypeP->getDefaultArgStorage().isInherited());
-        EXPECT_TRUE(TemplateP->getDefaultArgStorage().isInherited());
-        EXPECT_EQ(NonTypeP->getDefaultArgStorage().getInheritedFrom(),
-                  CanNonTypeP);
-        EXPECT_EQ(TypeP->getDefaultArgStorage().getInheritedFrom(), CanTypeP);
-        EXPECT_EQ(TemplateP->getDefaultArgStorage().getInheritedFrom(),
-                  CanTemplateP);
-      }
+  void checkTemplateParams(RedeclarableTemplateDecl *D,
+                           RedeclarableTemplateDecl *InheritedFromD) {
+    auto *NonTypeP =
+        cast<NonTypeTemplateParmDecl>(D->getTemplateParameters()->getParam(0));
+    auto *TypeP =
+        cast<TemplateTypeParmDecl>(D->getTemplateParameters()->getParam(1));
+    auto *TemplateP =
+        cast<TemplateTemplateParmDecl>(D->getTemplateParameters()->getParam(2));
+    if (InheritedFromD) {
+      EXPECT_TRUE(NonTypeP->getDefaultArgStorage().isInherited());
+      EXPECT_TRUE(TypeP->getDefaultArgStorage().isInherited());
+      EXPECT_TRUE(TemplateP->getDefaultArgStorage().isInherited());
+      EXPECT_EQ(NonTypeP->getDefaultArgStorage().getInheritedFrom(),
+                InheritedFromD->getTemplateParameters()->getParam(0));
+      EXPECT_EQ(TypeP->getDefaultArgStorage().getInheritedFrom(),
+                InheritedFromD->getTemplateParameters()->getParam(1));
+      EXPECT_EQ(TemplateP->getDefaultArgStorage().getInheritedFrom(),
+                InheritedFromD->getTemplateParameters()->getParam(2));
+    } else {
+      EXPECT_FALSE(NonTypeP->getDefaultArgStorage().isInherited());
+      EXPECT_FALSE(TypeP->getDefaultArgStorage().isInherited());
+      EXPECT_FALSE(TemplateP->getDefaultArgStorage().isInherited());
     }
   }
 
-  void testImport(RedeclarableTemplateDecl *FromD) {
-    RedeclarableTemplateDecl *ToD = Import(FromD, Lang_CXX14);
-    checkTemplateParams(ToD);
+  void testImport(RedeclarableTemplateDecl *FromD1,
+                  RedeclarableTemplateDecl *FromD2,
+                  RedeclarableTemplateDecl *FromD3,
+                  RedeclarableTemplateDecl *ToExistingD1) {
+    auto *ToD1 = Import(FromD1, Lang_CXX14);
+    auto *ToD2 = Import(FromD2, Lang_CXX14);
+    auto *ToD3 = Import(FromD3, Lang_CXX14);
+    checkTemplateParams(ToD1, nullptr);
+    checkTemplateParams(ToD2, ToD1);
+    checkTemplateParams(ToD3, ToExistingD1 ? ToExistingD1 : ToD1);
+  }
+
+  // In these tests a circular dependency is created between the template
+  // parameter default value and the template declaration (with the same
+  // template parameter).
+  template <class TemplateParmDeclT>
+  void
+  testTemplateParmDeclCircularDependency(ClassTemplateDecl *FromD,
+                                         ClassTemplateDecl *FromDInherited) {
+    auto GetTemplateParm =
+        [](ClassTemplateDecl *D) -> const TemplateParmDeclT * {
+      return dyn_cast<TemplateParmDeclT>(
+          D->getTemplateParameters()->getParam(0));
+    };
+
+    ASSERT_FALSE(GetTemplateParm(FromD)->getDefaultArgStorage().isInherited());
+    ASSERT_TRUE(
+        GetTemplateParm(FromDInherited)->getDefaultArgStorage().isInherited());
+
+    auto *ToD = Import(FromD, Lang_CXX14);
+    EXPECT_TRUE(ToD);
+
+    auto *ToDInherited = Import(FromDInherited, Lang_CXX14);
+    EXPECT_TRUE(ToDInherited);
+
+    EXPECT_FALSE(GetTemplateParm(ToD)->getDefaultArgStorage().isInherited());
+    EXPECT_TRUE(
+        GetTemplateParm(ToDInherited)->getDefaultArgStorage().isInherited());
+    EXPECT_EQ(GetTemplateParm(ToDInherited)
+                  ->getDefaultArgStorage()
+                  .getInheritedFrom(),
+              GetTemplateParm(ToD));
+
+    EXPECT_EQ(ToD->getPreviousDecl(), ToDInherited);
   }
 
   const char *CodeFunction =
@@ -9878,81 +9912,245 @@ protected:
       template <class> struct X;
 
       template <int A = 2, typename B = int, template<class> class C = X>
-      void f();
+      void test();
       template <int A, typename B, template<class> class C>
-      void f();
+      void test();
       template <int A, typename B, template<class> class C>
-      void f() {}
+      void test() {}
       )";
 
   const char *CodeClass =
       R"(
+      namespace N {
       template <class> struct X;
 
       template <int A = 2, typename B = int, template<class> class C = X>
-      struct S;
+      struct test;
       template <int A, typename B, template<class> class C>
-      struct S;
+      struct test;
       template <int A, typename B, template<class> class C>
-      struct S {};
+      struct test {};
+      }
       )";
 
   const char *CodeVar =
       R"(
+      namespace N {
       template <class> struct X;
 
       template <int A = 2, typename B = int, template<class> class C = X>
-      extern int V;
+      extern int test;
       template <int A, typename B, template<class> class C>
-      extern int V;
+      extern int test;
       template <int A, typename B, template<class> class C>
-      int V = A;
+      int test = A;
+      }
       )";
 };
 
-TEST_P(ImportTemplateParmDeclDefaultValue, ImportFunctionTemplate) {
-  Decl *FromTU = getTuDecl(CodeFunction, Lang_CXX14);
-  auto *FromLastD = LastDeclMatcher<FunctionTemplateDecl>().match(
+TEST_P(ImportTemplateParmDeclDefaultValue, InvisibleInheritedFrom) {
+  const char *ToCode =
+      R"(
+      template <int P = 1>
+      void f() {}
+      )";
+  TranslationUnitDecl *ToTU = getToTuDecl(ToCode, Lang_CXX14);
+  auto *ToFDef = FirstDeclMatcher<FunctionTemplateDecl>().match(
+      ToTU, functionTemplateDecl(hasName("f")));
+
+  const char *FromCode =
+      R"(
+      template <int P = 1>
+      void f() {}
+      template <int P>
+      void f();
+      )";
+  TranslationUnitDecl *FromTU = getTuDecl(FromCode, Lang_CXX14);
+  auto *FromFDef = FirstDeclMatcher<FunctionTemplateDecl>().match(
+      FromTU, functionTemplateDecl(hasName("f")));
+  auto *FromF = LastDeclMatcher<FunctionTemplateDecl>().match(
       FromTU, functionTemplateDecl(hasName("f")));
-  testImport(FromLastD);
+
+  auto *ToFDefImported = Import(FromFDef, Lang_CXX14);
+  EXPECT_EQ(ToFDefImported, ToFDef);
+  auto *ToF = Import(FromF, Lang_CXX14);
+  EXPECT_NE(ToF, ToFDef);
+  const auto *Parm = dyn_cast<NonTypeTemplateParmDecl>(
+      ToF->getTemplateParameters()->getParam(0));
+  EXPECT_TRUE(Parm->defaultArgumentWasInherited());
+  // FIXME: This behavior may be confusing:
+  // Default value is not inherited from the existing declaration, instead a new
+  // is created at import that is similar to the existing but not reachable from
+  // the AST.
+  EXPECT_NE(Parm->getDefaultArgStorage().getInheritedFrom(),
+            ToFDef->getTemplateParameters()->getParam(0));
+}
+
+TEST_P(ImportTemplateParmDeclDefaultValue, ImportFunctionTemplate) {
+  TranslationUnitDecl *FromTU = getTuDecl(CodeFunction, Lang_CXX14);
+  auto *D3 = LastDeclMatcher<FunctionTemplateDecl>().match(
+      FromTU, functionTemplateDecl(hasName("test") /*, hasBody(stmt())*/));
+  auto *D2 = dyn_cast<FunctionTemplateDecl>(D3->getPreviousDecl());
+  auto *D1 = dyn_cast<FunctionTemplateDecl>(D2->getPreviousDecl());
+  testImport(D1, D2, D3, nullptr);
 }
 
 TEST_P(ImportTemplateParmDeclDefaultValue, ImportExistingFunctionTemplate) {
-  getToTuDecl(CodeFunction, Lang_CXX14);
-  Decl *FromTU = getTuDecl(CodeFunction, Lang_CXX14);
-  auto *FromLastD = LastDeclMatcher<FunctionTemplateDecl>().match(
-      FromTU, functionTemplateDecl(hasName("f")));
-  testImport(FromLastD);
+  TranslationUnitDecl *ToTU = getToTuDecl(CodeFunction, Lang_CXX14);
+  auto *ToD1 = FirstDeclMatcher<FunctionTemplateDecl>().match(
+      ToTU, functionTemplateDecl(hasName("test")));
+  TranslationUnitDecl *FromTU = getTuDecl(CodeFunction, Lang_CXX14);
+  auto *D3 = LastDeclMatcher<FunctionTemplateDecl>().match(
+      FromTU, functionTemplateDecl(hasName("test")));
+  auto *D2 = dyn_cast<FunctionTemplateDecl>(D3->getPreviousDecl());
+  auto *D1 = dyn_cast<FunctionTemplateDecl>(D2->getPreviousDecl());
+  testImport(D1, D2, D3, ToD1);
 }
 
 TEST_P(ImportTemplateParmDeclDefaultValue, ImportClassTemplate) {
-  Decl *FromTU = getTuDecl(CodeClass, Lang_CXX14);
-  auto *FromLastD = LastDeclMatcher<ClassTemplateDecl>().match(
-      FromTU, classTemplateDecl(hasName("S")));
-  testImport(FromLastD);
+  TranslationUnitDecl *FromTU = getTuDecl(CodeClass, Lang_CXX14);
+  auto *D3 = LastDeclMatcher<ClassTemplateDecl>().match(
+      FromTU, classTemplateDecl(hasName("test")));
+  auto *D2 = dyn_cast<ClassTemplateDecl>(D3->getPreviousDecl());
+  auto *D1 = dyn_cast<ClassTemplateDecl>(D2->getPreviousDecl());
+  testImport(D1, D2, D3, nullptr);
 }
 
 TEST_P(ImportTemplateParmDeclDefaultValue, ImportExistingClassTemplate) {
-  getToTuDecl(CodeClass, Lang_CXX14);
-  Decl *FromTU = getTuDecl(CodeClass, Lang_CXX14);
-  auto *FromLastD = LastDeclMatcher<ClassTemplateDecl>().match(
-      FromTU, classTemplateDecl(hasName("S")));
-  testImport(FromLastD);
+  TranslationUnitDecl *ToTU = getToTuDecl(CodeClass, Lang_CXX14);
+  auto *ToD1 = FirstDeclMatcher<ClassTemplateDecl>().match(
+      ToTU, classTemplateDecl(hasName("test")));
+  TranslationUnitDecl *FromTU = getTuDecl(CodeClass, Lang_CXX14);
+  auto *D3 = LastDeclMatcher<ClassTemplateDecl>().match(
+      FromTU, classTemplateDecl(hasName("test")));
+  auto *D2 = dyn_cast<ClassTemplateDecl>(D3->getPreviousDecl());
+  auto *D1 = dyn_cast<ClassTemplateDecl>(D2->getPreviousDecl());
+  testImport(D1, D2, D3, ToD1);
 }
 
 TEST_P(ImportTemplateParmDeclDefaultValue, ImportVarTemplate) {
-  Decl *FromTU = getTuDecl(CodeVar, Lang_CXX14);
-  auto *FromLastD = LastDeclMatcher<VarTemplateDecl>().match(
-      FromTU, varTemplateDecl(hasName("V")));
-  testImport(FromLastD);
+  TranslationUnitDecl *FromTU = getTuDecl(CodeVar, Lang_CXX14);
+  auto *D3 = LastDeclMatcher<VarTemplateDecl>().match(
+      FromTU, varTemplateDecl(hasName("test")));
+  auto *D2 = dyn_cast<VarTemplateDecl>(D3->getPreviousDecl());
+  auto *D1 = dyn_cast<VarTemplateDecl>(D2->getPreviousDecl());
+  testImport(D1, D2, D3, nullptr);
 }
 
 TEST_P(ImportTemplateParmDeclDefaultValue, ImportExistingVarTemplate) {
-  getToTuDecl(CodeVar, Lang_CXX14);
-  Decl *FromTU = getTuDecl(CodeVar, Lang_CXX14);
-  auto *FromLastD = LastDeclMatcher<VarTemplateDecl>().match(
-      FromTU, varTemplateDecl(hasName("V")));
-  testImport(FromLastD);
+  TranslationUnitDecl *ToTU = getToTuDecl(CodeVar, Lang_CXX14);
+  auto *ToD1 = FirstDeclMatcher<VarTemplateDecl>().match(
+      ToTU, varTemplateDecl(hasName("test")));
+  TranslationUnitDecl *FromTU = getTuDecl(CodeVar, Lang_CXX14);
+  auto *D3 = LastDeclMatcher<VarTemplateDecl>().match(
+      FromTU, varTemplateDecl(hasName("test")));
+  auto *D2 = dyn_cast<VarTemplateDecl>(D3->getPreviousDecl());
+  auto *D1 = dyn_cast<VarTemplateDecl>(D2->getPreviousDecl());
+  testImport(D1, D2, D3, ToD1);
+}
+
+TEST_P(ImportTemplateParmDeclDefaultValue,
+       NonTypeTemplateParmDeclCircularDependency) {
+  const char *Code =
+      R"(
+      struct Z;
+
+      struct Y {
+        Z *z;
+        static const int x = 1;
+      };
+
+      template <int P1 = Y::x>
+      struct X;
+
+      template <int P2>
+      struct X {
+        static const int A = 1;
+      };
+
+      struct Z {
+        template<int P>
+        void f(int A = X<P>::A);
+      };
+      )";
+
+  Decl *FromTU = getTuDecl(Code, Lang_CXX14);
+  auto *FromD = FirstDeclMatcher<ClassTemplateDecl>().match(
+      FromTU, classTemplateDecl(hasName("X")));
+  auto *FromDInherited = LastDeclMatcher<ClassTemplateDecl>().match(
+      FromTU, classTemplateDecl(hasName("X")));
+
+  testTemplateParmDeclCircularDependency<NonTypeTemplateParmDecl>(
+      FromD, FromDInherited);
+}
+
+TEST_P(ImportTemplateParmDeclDefaultValue,
+       TemplateTypeParmDeclCircularDependency) {
+  const char *Code =
+      R"(
+      struct Z;
+
+      struct Y {
+        Z *z;
+      };
+
+      template <typename T1 = Y>
+      struct X;
+
+      template <typename T2>
+      struct X {
+        static const int A = 1;
+      };
+
+      struct Z {
+        template<typename T>
+        void f(int A = X<T>::A);
+      };
+      )";
+
+  Decl *FromTU = getTuDecl(Code, Lang_CXX14);
+  auto *FromD = FirstDeclMatcher<ClassTemplateDecl>().match(
+      FromTU, classTemplateDecl(hasName("X")));
+  auto *FromDInherited = LastDeclMatcher<ClassTemplateDecl>().match(
+      FromTU, classTemplateDecl(hasName("X")));
+
+  testTemplateParmDeclCircularDependency<TemplateTypeParmDecl>(FromD,
+                                                               FromDInherited);
+}
+
+TEST_P(ImportTemplateParmDeclDefaultValue,
+       TemplateTemplateParmDeclCircularDependency) {
+  const char *Code =
+      R"(
+      struct Z;
+
+      template <int>
+      struct Y {
+        Z *z;
+      };
+
+      template <template <int> class T1 = Y>
+      struct X;
+
+      template <template <int> class T2>
+      struct X {
+        static const int A = 1;
+      };
+
+      struct Z {
+        template <template <int> class T>
+        void f(int A = X<T>::A);
+      };
+      )";
+
+  Decl *FromTU = getTuDecl(Code, Lang_CXX14);
+  auto *FromD = FirstDeclMatcher<ClassTemplateDecl>().match(
+      FromTU, classTemplateDecl(hasName("X")));
+  auto *FromDInherited = LastDeclMatcher<ClassTemplateDecl>().match(
+      FromTU, classTemplateDecl(hasName("X")));
+
+  testTemplateParmDeclCircularDependency<TemplateTemplateParmDecl>(
+      FromD, FromDInherited);
 }
 
 INSTANTIATE_TEST_SUITE_P(ParameterizedTests, ASTImporterLookupTableTest,
diff --git clang/unittests/Driver/GCCVersionTest.cpp clang/unittests/Driver/GCCVersionTest.cpp
index 3158911fe5db..1aab13b7abf8 100644
--- clang/unittests/Driver/GCCVersionTest.cpp
+++ clang/unittests/Driver/GCCVersionTest.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "../../lib/Driver/ToolChains/Gnu.h"
+#include "llvm/Config/llvm-config.h" // for LLVM_BUILD_LLVM_DYLIB, LLVM_BUILD_SHARED_LIBS
 #include "gtest/gtest.h"
 
 // The Generic_GCC class is hidden in dylib/shared library builds, so
diff --git clang/unittests/Format/FormatTestJS.cpp clang/unittests/Format/FormatTestJS.cpp
index 4b29ba720f68..c25228a69a74 100644
--- clang/unittests/Format/FormatTestJS.cpp
+++ clang/unittests/Format/FormatTestJS.cpp
@@ -2850,5 +2850,22 @@ TEST_F(FormatTestJS, DontBreakFieldsAsGoToLabels) {
                "};");
 }
 
+TEST_F(FormatTestJS, BreakAfterOpenBracket) {
+  auto Style = getGoogleStyle(FormatStyle::LK_JavaScript);
+  EXPECT_EQ(Style.AlignAfterOpenBracket, FormatStyle::BAS_AlwaysBreak);
+  verifyFormat("ctrl.onCopy(/** @type {!WizEvent}*/ (\n"
+               "    {event, targetElement: {el: () => selectedElement}}));",
+               Style);
+  verifyFormat("failedUserIds.push(...subscriptioxxxxxxxxxxxxnSubset.map(\n"
+               "    subscxxxxxxxxxxxxription => subscription.getUserId()));",
+               Style);
+  verifyFormat("failedUserIds.push(!subscriptioxxxxxxxxxxxxnSubset.map(\n"
+               "    subscxxxxxxxxxxxxription => subscription.getUserId()));",
+               Style);
+  verifyFormat("failedUserIds.push(await subscriptioxxxxxxxxxxxxnSubset.map(\n"
+               "    subscxxxxxxxxxxxxription => subscription.getUserId()));",
+               Style);
+}
+
 } // namespace format
 } // end namespace clang
diff --git clang/unittests/Format/TokenAnnotatorTest.cpp clang/unittests/Format/TokenAnnotatorTest.cpp
index c0436d8a2e18..5c28e3a4ea5a 100644
--- clang/unittests/Format/TokenAnnotatorTest.cpp
+++ clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -775,6 +775,12 @@ TEST_F(TokenAnnotatorTest, UnderstandsCasts) {
   EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_Unknown);
   EXPECT_TOKEN(Tokens[9], tok::minus, TT_BinaryOperator);
 
+  Tokens = annotate("return (double)(foo(30)) - 15;");
+  ASSERT_EQ(Tokens.size(), 14u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_CastRParen);
+  EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_Unknown);
+  EXPECT_TOKEN(Tokens[10], tok::minus, TT_BinaryOperator);
+
   auto Style = getLLVMStyle();
   Style.TypeNames.push_back("Foo");
   Tokens = annotate("#define FOO(bar) foo((Foo)&bar)", Style);
@@ -2044,7 +2050,7 @@ TEST_F(TokenAnnotatorTest, UnderstandsFunctionDeclarationNames) {
   EXPECT_TOKEN(Tokens[4], tok::l_paren, TT_FunctionTypeLParen);
 
   Tokens = annotate("void instanceof();");
-  ASSERT_EQ(Tokens.size(), 6u);
+  ASSERT_EQ(Tokens.size(), 6u) << Tokens;
   EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName);
   EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_FunctionDeclarationLParen);
 
@@ -3278,6 +3284,26 @@ TEST_F(TokenAnnotatorTest, BraceKind) {
   EXPECT_BRACE_KIND(Tokens[10], BK_Block);
   EXPECT_TOKEN(Tokens[11], tok::r_brace, TT_StructRBrace);
   EXPECT_BRACE_KIND(Tokens[11], BK_Block);
+
+  Tokens = annotate("#define MACRO            \\\n"
+                    "  struct hash<type> {    \\\n"
+                    "    void f() { return; } \\\n"
+                    "  };");
+  ASSERT_EQ(Tokens.size(), 20u) << Tokens;
+  EXPECT_TOKEN(Tokens[8], tok::l_brace, TT_StructLBrace);
+  EXPECT_BRACE_KIND(Tokens[8], BK_Block);
+  EXPECT_TOKEN(Tokens[10], tok::identifier, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[11], tok::l_paren, TT_FunctionDeclarationLParen);
+  EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_FunctionLBrace);
+  EXPECT_BRACE_KIND(Tokens[13], BK_Block);
+  EXPECT_BRACE_KIND(Tokens[16], BK_Block);
+  EXPECT_TOKEN(Tokens[17], tok::r_brace, TT_StructRBrace);
+  EXPECT_BRACE_KIND(Tokens[17], BK_Block);
+
+  Tokens = annotate("#define MEMBER(NAME) NAME{\"\"}");
+  ASSERT_EQ(Tokens.size(), 11u) << Tokens;
+  EXPECT_BRACE_KIND(Tokens[7], BK_BracedInit);
+  EXPECT_BRACE_KIND(Tokens[9], BK_BracedInit);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandsElaboratedTypeSpecifier) {
@@ -3339,55 +3365,55 @@ TEST_F(TokenAnnotatorTest, SwitchExpression) {
 
 TEST_F(TokenAnnotatorTest, CppAltOperatorKeywords) {
   auto Tokens = annotate("a = b and c;");
-  ASSERT_EQ(Tokens.size(), 7u);
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::ampamp, TT_BinaryOperator);
 
   Tokens = annotate("a = b and_eq c;");
-  ASSERT_EQ(Tokens.size(), 7u);
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::ampequal, TT_BinaryOperator);
 
   Tokens = annotate("a = b bitand c;");
-  ASSERT_EQ(Tokens.size(), 7u);
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::amp, TT_BinaryOperator);
 
   Tokens = annotate("a = b bitor c;");
-  ASSERT_EQ(Tokens.size(), 7u);
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::pipe, TT_BinaryOperator);
 
   Tokens = annotate("a = b compl c;");
-  ASSERT_EQ(Tokens.size(), 7u);
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::tilde, TT_UnaryOperator);
 
   Tokens = annotate("a = b not c;");
-  ASSERT_EQ(Tokens.size(), 7u);
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::exclaim, TT_UnaryOperator);
 
   Tokens = annotate("a = b not_eq c;");
-  ASSERT_EQ(Tokens.size(), 7u);
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::exclaimequal, TT_BinaryOperator);
 
   Tokens = annotate("a = b or c;");
-  ASSERT_EQ(Tokens.size(), 7u);
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::pipepipe, TT_BinaryOperator);
 
   Tokens = annotate("a = b or_eq c;");
-  ASSERT_EQ(Tokens.size(), 7u);
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::pipeequal, TT_BinaryOperator);
 
   Tokens = annotate("a = b xor c;");
-  ASSERT_EQ(Tokens.size(), 7u);
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::caret, TT_BinaryOperator);
 
   Tokens = annotate("a = b xor_eq c;");
-  ASSERT_EQ(Tokens.size(), 7u);
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::caretequal, TT_BinaryOperator);
 
   Tokens = annotate("xor = foo;");
-  ASSERT_EQ(Tokens.size(), 5u);
+  ASSERT_EQ(Tokens.size(), 5u) << Tokens;
   EXPECT_TOKEN(Tokens[0], tok::identifier, TT_Unknown);
 
   Tokens = annotate("int xor = foo;");
-  ASSERT_EQ(Tokens.size(), 6u);
+  ASSERT_EQ(Tokens.size(), 6u) << Tokens;
   EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName);
 }
 
@@ -3397,7 +3423,7 @@ TEST_F(TokenAnnotatorTest, FunctionTryBlock) {
                "    : foo{[] -> std::string { return {}; }(), x}, bar{y} {\n"
                "} catch (...) {\n"
                "}");
-  ASSERT_EQ(Tokens.size(), 45u);
+  ASSERT_EQ(Tokens.size(), 45u) << Tokens;
   EXPECT_TOKEN(Tokens[2], tok::identifier, TT_CtorDtorDeclName);
   EXPECT_TOKEN(Tokens[3], tok::l_paren, TT_FunctionDeclarationLParen);
   EXPECT_TOKEN(Tokens[11], tok::colon, TT_CtorInitializerColon);
@@ -3413,7 +3439,7 @@ TEST_F(TokenAnnotatorTest, TypenameMacro) {
   Style.TypenameMacros.push_back("STRUCT");
 
   auto Tokens = annotate("STRUCT(T, B) { int i; };", Style);
-  ASSERT_EQ(Tokens.size(), 13u);
+  ASSERT_EQ(Tokens.size(), 13u) << Tokens;
   EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TypenameMacro);
   EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_TypeDeclarationParen);
   EXPECT_TOKEN(Tokens[5], tok::r_paren, TT_TypeDeclarationParen);
@@ -3425,7 +3451,7 @@ TEST_F(TokenAnnotatorTest, GNULanguageStandard) {
   EXPECT_EQ(Style.Standard, FormatStyle::LS_Latest);
 
   auto Tokens = annotate("return 1 <=> 2;", Style);
-  ASSERT_EQ(Tokens.size(), 6u);
+  ASSERT_EQ(Tokens.size(), 6u) << Tokens;
   EXPECT_TOKEN(Tokens[2], tok::spaceship, TT_BinaryOperator);
 }
 
diff --git clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
index 29c0b36492a9..111351fb90ce 100644
--- clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
+++ clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
@@ -13,39 +13,11 @@
 
 using namespace clang::tooling::dependencies;
 
-namespace {
-struct InstrumentingFilesystem
-    : llvm::RTTIExtends<InstrumentingFilesystem, llvm::vfs::ProxyFileSystem> {
-  unsigned NumStatusCalls = 0;
-  unsigned NumGetRealPathCalls = 0;
-  unsigned NumExistsCalls = 0;
-
-  using llvm::RTTIExtends<InstrumentingFilesystem,
-                          llvm::vfs::ProxyFileSystem>::RTTIExtends;
-
-  llvm::ErrorOr<llvm::vfs::Status> status(const llvm::Twine &Path) override {
-    ++NumStatusCalls;
-    return ProxyFileSystem::status(Path);
-  }
-
-  std::error_code getRealPath(const llvm::Twine &Path,
-                              llvm::SmallVectorImpl<char> &Output) override {
-    ++NumGetRealPathCalls;
-    return ProxyFileSystem::getRealPath(Path, Output);
-  }
-
-  bool exists(const llvm::Twine &Path) override {
-    ++NumExistsCalls;
-    return ProxyFileSystem::exists(Path);
-  }
-};
-} // namespace
-
 TEST(DependencyScanningWorkerFilesystem, CacheStatusFailures) {
   auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   auto InstrumentingFS =
-      llvm::makeIntrusiveRefCnt<InstrumentingFilesystem>(InMemoryFS);
+      llvm::makeIntrusiveRefCnt<llvm::vfs::TracingFileSystem>(InMemoryFS);
 
   DependencyScanningFilesystemSharedCache SharedCache;
   DependencyScanningWorkerFilesystem DepFS(SharedCache, InstrumentingFS);
@@ -71,7 +43,7 @@ TEST(DependencyScanningFilesystem, CacheGetRealPath) {
   InMemoryFS->addFile("/bar", 0, llvm::MemoryBuffer::getMemBuffer(""));
 
   auto InstrumentingFS =
-      llvm::makeIntrusiveRefCnt<InstrumentingFilesystem>(InMemoryFS);
+      llvm::makeIntrusiveRefCnt<llvm::vfs::TracingFileSystem>(InMemoryFS);
 
   DependencyScanningFilesystemSharedCache SharedCache;
   DependencyScanningWorkerFilesystem DepFS(SharedCache, InstrumentingFS);
@@ -157,7 +129,7 @@ TEST(DependencyScanningFilesystem, RealPathAndStatusInvariants) {
 TEST(DependencyScanningFilesystem, CacheStatOnExists) {
   auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   auto InstrumentingFS =
-      llvm::makeIntrusiveRefCnt<InstrumentingFilesystem>(InMemoryFS);
+      llvm::makeIntrusiveRefCnt<llvm::vfs::TracingFileSystem>(InMemoryFS);
   InMemoryFS->setCurrentWorkingDirectory("/");
   InMemoryFS->addFile("/foo", 0, llvm::MemoryBuffer::getMemBuffer(""));
   InMemoryFS->addFile("/bar", 0, llvm::MemoryBuffer::getMemBuffer(""));
@@ -182,7 +154,7 @@ TEST(DependencyScanningFilesystem, CacheStatFailures) {
   InMemoryFS->addFile("/cache/a.pcm", 0, llvm::MemoryBuffer::getMemBuffer(""));
 
   auto InstrumentingFS =
-      llvm::makeIntrusiveRefCnt<InstrumentingFilesystem>(InMemoryFS);
+      llvm::makeIntrusiveRefCnt<llvm::vfs::TracingFileSystem>(InMemoryFS);
 
   DependencyScanningFilesystemSharedCache SharedCache;
   DependencyScanningWorkerFilesystem DepFS(SharedCache, InstrumentingFS);
diff --git clang/unittests/Tooling/StencilTest.cpp clang/unittests/Tooling/StencilTest.cpp
index 26257cf2ca3a..445912a53e8b 100644
--- clang/unittests/Tooling/StencilTest.cpp
+++ clang/unittests/Tooling/StencilTest.cpp
@@ -565,6 +565,28 @@ TEST_F(StencilTest, DescribeAnonNamespaceType) {
                        HasValue(std::string(Expected)));
 }
 
+TEST_F(StencilTest, DescribeFunction) {
+  std::string Snippet = "int F(); F();";
+  std::string Expected = "F";
+  auto StmtMatch = matchStmt(Snippet, callExpr(callee(namedDecl().bind("fn"))));
+  ASSERT_TRUE(StmtMatch);
+  EXPECT_THAT_EXPECTED(describe("fn")->eval(StmtMatch->Result),
+                       HasValue(std::string(Expected)));
+}
+
+TEST_F(StencilTest, DescribeImplicitOperator) {
+  std::string Snippet = "struct Tag {}; [](Tag){};";
+  std::string Expected = "operator()";
+  auto StmtMatch = matchStmt(
+      Snippet,
+      stmt(hasDescendant(
+          cxxMethodDecl(hasParameter(0, hasType(namedDecl(hasName("Tag")))))
+              .bind("fn"))));
+  ASSERT_TRUE(StmtMatch);
+  EXPECT_THAT_EXPECTED(describe("fn")->eval(StmtMatch->Result),
+                       HasValue(std::string(Expected)));
+}
+
 TEST_F(StencilTest, RunOp) {
   StringRef Id = "id";
   auto SimpleFn = [Id](const MatchResult &R) {
diff --git clang/utils/TableGen/ASTTableGen.cpp clang/utils/TableGen/ASTTableGen.cpp
index 54288ff6a03b..47344777e931 100644
--- clang/utils/TableGen/ASTTableGen.cpp
+++ clang/utils/TableGen/ASTTableGen.cpp
@@ -31,7 +31,8 @@ llvm::StringRef clang::tblgen::HasProperties::getName() const {
   }
 }
 
-static StringRef removeExpectedNodeNameSuffix(Record *node, StringRef suffix) {
+static StringRef removeExpectedNodeNameSuffix(const Record *node,
+                                              StringRef suffix) {
   StringRef nodeName = node->getName();
   if (!nodeName.ends_with(suffix)) {
     PrintFatalError(node->getLoc(),
@@ -105,8 +106,7 @@ static void visitASTNodeRecursive(ASTNode node, ASTNode base,
   }
 }
 
-static void visitHierarchy(RecordKeeper &records,
-                           StringRef nodeClassName,
+static void visitHierarchy(const RecordKeeper &records, StringRef nodeClassName,
                            ASTNodeHierarchyVisitor<ASTNode> visit) {
   // Check for the node class, just as a basic correctness check.
   if (!records.getClass(nodeClassName)) {
@@ -114,13 +114,10 @@ static void visitHierarchy(RecordKeeper &records,
                       + nodeClassName);
   }
 
-  // Find all the nodes in the hierarchy.
-  auto nodes = records.getAllDerivedDefinitions(nodeClassName);
-
-  // Derive the child map.
+  // Derive the child map for all nodes in the hierarchy.
   ChildMap hierarchy;
   ASTNode root;
-  for (ASTNode node : nodes) {
+  for (ASTNode node : records.getAllDerivedDefinitions(nodeClassName)) {
     if (auto base = node.getBase())
       hierarchy.insert(std::make_pair(base, node));
     else if (root)
@@ -136,8 +133,8 @@ static void visitHierarchy(RecordKeeper &records,
   visitASTNodeRecursive(root, ASTNode(), hierarchy, visit);
 }
 
-void clang::tblgen::visitASTNodeHierarchyImpl(RecordKeeper &records,
-                                              StringRef nodeClassName,
-                                      ASTNodeHierarchyVisitor<ASTNode> visit) {
+void clang::tblgen::visitASTNodeHierarchyImpl(
+    const RecordKeeper &records, StringRef nodeClassName,
+    ASTNodeHierarchyVisitor<ASTNode> visit) {
   visitHierarchy(records, nodeClassName, visit);
 }
diff --git clang/utils/TableGen/ASTTableGen.h clang/utils/TableGen/ASTTableGen.h
index 41f78a6a3bbc..143d779a8a64 100644
--- clang/utils/TableGen/ASTTableGen.h
+++ clang/utils/TableGen/ASTTableGen.h
@@ -87,18 +87,18 @@ namespace clang {
 namespace tblgen {
 
 class WrappedRecord {
-  llvm::Record *Record;
+  const llvm::Record *Record;
 
 protected:
-  WrappedRecord(llvm::Record *record = nullptr) : Record(record) {}
+  WrappedRecord(const llvm::Record *record = nullptr) : Record(record) {}
 
-  llvm::Record *get() const {
+  const llvm::Record *get() const {
     assert(Record && "accessing null record");
     return Record;
   }
 
 public:
-  llvm::Record *getRecord() const { return Record; }
+  const llvm::Record *getRecord() const { return Record; }
 
   explicit operator bool() const { return Record != nullptr; }
 
@@ -144,7 +144,7 @@ class HasProperties : public WrappedRecord {
 public:
   static constexpr llvm::StringRef ClassName = HasPropertiesClassName;
 
-  HasProperties(llvm::Record *record = nullptr) : WrappedRecord(record) {}
+  HasProperties(const llvm::Record *record = nullptr) : WrappedRecord(record) {}
 
   llvm::StringRef getName() const;
 
@@ -157,7 +157,7 @@ public:
 /// in one of Clang's AST hierarchies.
 class ASTNode : public HasProperties {
 public:
-  ASTNode(llvm::Record *record = nullptr) : HasProperties(record) {}
+  ASTNode(const llvm::Record *record = nullptr) : HasProperties(record) {}
 
   llvm::StringRef getName() const {
     return get()->getName();
@@ -180,7 +180,7 @@ public:
 
 class DeclNode : public ASTNode {
 public:
-  DeclNode(llvm::Record *record = nullptr) : ASTNode(record) {}
+  DeclNode(const llvm::Record *record = nullptr) : ASTNode(record) {}
 
   llvm::StringRef getId() const;
   std::string getClassName() const;
@@ -202,7 +202,7 @@ public:
 
 class TypeNode : public ASTNode {
 public:
-  TypeNode(llvm::Record *record = nullptr) : ASTNode(record) {}
+  TypeNode(const llvm::Record *record = nullptr) : ASTNode(record) {}
 
   llvm::StringRef getId() const;
   llvm::StringRef getClassName() const;
@@ -224,7 +224,7 @@ public:
 
 class StmtNode : public ASTNode {
 public:
-  StmtNode(llvm::Record *record = nullptr) : ASTNode(record) {}
+  StmtNode(const llvm::Record *record = nullptr) : ASTNode(record) {}
 
   std::string getId() const;
   llvm::StringRef getClassName() const;
@@ -247,7 +247,7 @@ public:
 /// The type of a property.
 class PropertyType : public WrappedRecord {
 public:
-  PropertyType(llvm::Record *record = nullptr) : WrappedRecord(record) {}
+  PropertyType(const llvm::Record *record = nullptr) : WrappedRecord(record) {}
 
   /// Is this a generic specialization (i.e. `Array<T>` or `Optional<T>`)?
   bool isGenericSpecialization() const {
@@ -331,7 +331,7 @@ public:
 /// A rule for returning the kind of a type.
 class TypeKindRule : public WrappedRecord {
 public:
-  TypeKindRule(llvm::Record *record = nullptr) : WrappedRecord(record) {}
+  TypeKindRule(const llvm::Record *record = nullptr) : WrappedRecord(record) {}
 
   /// Return the type to which this applies.
   PropertyType getParentType() const {
@@ -361,7 +361,7 @@ public:
 /// An implementation case of a property type.
 class TypeCase : public HasProperties {
 public:
-  TypeCase(llvm::Record *record = nullptr) : HasProperties(record) {}
+  TypeCase(const llvm::Record *record = nullptr) : HasProperties(record) {}
 
   /// Return the name of this case.
   llvm::StringRef getCaseName() const {
@@ -381,7 +381,7 @@ public:
 /// A property of an AST node.
 class Property : public WrappedRecord {
 public:
-  Property(llvm::Record *record = nullptr) : WrappedRecord(record) {}
+  Property(const llvm::Record *record = nullptr) : WrappedRecord(record) {}
 
   /// Return the name of this property.
   llvm::StringRef getName() const {
@@ -417,7 +417,8 @@ public:
 /// a value (which is actually done when writing the value out).
 class ReadHelperRule : public WrappedRecord {
 public:
-  ReadHelperRule(llvm::Record *record = nullptr) : WrappedRecord(record) {}
+  ReadHelperRule(const llvm::Record *record = nullptr)
+      : WrappedRecord(record) {}
 
   /// Return the class for which this is a creation rule.
   /// Should never be abstract.
@@ -437,7 +438,7 @@ public:
 /// A rule for how to create an AST node from its properties.
 class CreationRule : public WrappedRecord {
 public:
-  CreationRule(llvm::Record *record = nullptr) : WrappedRecord(record) {}
+  CreationRule(const llvm::Record *record = nullptr) : WrappedRecord(record) {}
 
   /// Return the class for which this is a creation rule.
   /// Should never be abstract.
@@ -457,7 +458,7 @@ public:
 /// A rule which overrides the standard rules for serializing an AST node.
 class OverrideRule : public WrappedRecord {
 public:
-  OverrideRule(llvm::Record *record = nullptr) : WrappedRecord(record) {}
+  OverrideRule(const llvm::Record *record = nullptr) : WrappedRecord(record) {}
 
   /// Return the class for which this is an override rule.
   /// Should never be abstract.
@@ -483,12 +484,12 @@ template <class NodeClass>
 using ASTNodeHierarchyVisitor =
   llvm::function_ref<void(NodeClass node, NodeClass base)>;
 
-void visitASTNodeHierarchyImpl(llvm::RecordKeeper &records,
+void visitASTNodeHierarchyImpl(const llvm::RecordKeeper &records,
                                llvm::StringRef nodeClassName,
                                ASTNodeHierarchyVisitor<ASTNode> visit);
 
 template <class NodeClass>
-void visitASTNodeHierarchy(llvm::RecordKeeper &records,
+void visitASTNodeHierarchy(const llvm::RecordKeeper &records,
                            ASTNodeHierarchyVisitor<NodeClass> visit) {
   visitASTNodeHierarchyImpl(records, NodeClass::getTableGenNodeClassName(),
                             [visit](ASTNode node, ASTNode base) {
diff --git clang/utils/TableGen/ClangASTNodesEmitter.cpp clang/utils/TableGen/ClangASTNodesEmitter.cpp
index 07ddafce3291..512af830e57c 100644
--- clang/utils/TableGen/ClangASTNodesEmitter.cpp
+++ clang/utils/TableGen/ClangASTNodesEmitter.cpp
@@ -34,7 +34,7 @@ class ClangASTNodesEmitter {
   typedef ChildMap::const_iterator ChildIterator;
 
   std::set<ASTNode> PrioritizedClasses;
-  RecordKeeper &Records;
+  const RecordKeeper &Records;
   ASTNode Root;
   const std::string &NodeClassName;
   const std::string &BaseSuffix;
@@ -70,14 +70,12 @@ class ClangASTNodesEmitter {
 
   std::pair<ASTNode, ASTNode> EmitNode(raw_ostream& OS, ASTNode Base);
 public:
-  explicit ClangASTNodesEmitter(RecordKeeper &R, const std::string &N,
+  explicit ClangASTNodesEmitter(const RecordKeeper &R, const std::string &N,
                                 const std::string &S,
                                 std::string_view PriorizeIfSubclassOf)
       : Records(R), NodeClassName(N), BaseSuffix(S) {
-    auto vecPrioritized =
-        PriorizeIfSubclassOf.empty()
-            ? std::vector<Record *>{}
-            : R.getAllDerivedDefinitions(PriorizeIfSubclassOf);
+    ArrayRef<const Record *> vecPrioritized =
+        R.getAllDerivedDefinitionsIfDefined(PriorizeIfSubclassOf);
     PrioritizedClasses =
         std::set<ASTNode>(vecPrioritized.begin(), vecPrioritized.end());
   }
@@ -169,10 +167,7 @@ void ClangASTNodesEmitter::deriveChildTree() {
   assert(!Root && "already computed tree");
 
   // Emit statements
-  const std::vector<Record*> Stmts
-    = Records.getAllDerivedDefinitions(NodeClassName);
-
-  for (auto *R : Stmts) {
+  for (const Record *R : Records.getAllDerivedDefinitions(NodeClassName)) {
     if (auto B = R->getValueAsOptionalDef(BaseFieldName))
       Tree.insert(std::make_pair(B, R));
     else if (Root)
@@ -217,14 +212,14 @@ void ClangASTNodesEmitter::run(raw_ostream &OS) {
   OS << "#undef ABSTRACT_" << macroHierarchyName() << "\n";
 }
 
-void clang::EmitClangASTNodes(RecordKeeper &RK, raw_ostream &OS,
+void clang::EmitClangASTNodes(const RecordKeeper &RK, raw_ostream &OS,
                               const std::string &N, const std::string &S,
                               std::string_view PriorizeIfSubclassOf) {
   ClangASTNodesEmitter(RK, N, S, PriorizeIfSubclassOf).run(OS);
 }
 
-void printDeclContext(const std::multimap<Record *, Record *> &Tree,
-                      Record *DeclContext, raw_ostream &OS) {
+void printDeclContext(const std::multimap<const Record *, const Record *> &Tree,
+                      const Record *DeclContext, raw_ostream &OS) {
   if (!DeclContext->getValueAsBit(AbstractFieldName))
     OS << "DECL_CONTEXT(" << DeclContext->getName() << ")\n";
   auto i = Tree.lower_bound(DeclContext);
@@ -236,7 +231,7 @@ void printDeclContext(const std::multimap<Record *, Record *> &Tree,
 
 // Emits and addendum to a .inc file to enumerate the clang declaration
 // contexts.
-void clang::EmitClangDeclContext(RecordKeeper &Records, raw_ostream &OS) {
+void clang::EmitClangDeclContext(const RecordKeeper &Records, raw_ostream &OS) {
   // FIXME: Find a .td file format to allow for this to be represented better.
 
   emitSourceFileHeader("List of AST Decl nodes", OS, Records);
@@ -245,22 +240,15 @@ void clang::EmitClangDeclContext(RecordKeeper &Records, raw_ostream &OS) {
   OS << "#  define DECL_CONTEXT(DECL)\n";
   OS << "#endif\n";
 
-  std::vector<Record *> DeclContextsVector =
-      Records.getAllDerivedDefinitions(DeclContextNodeClassName);
-  std::vector<Record *> Decls =
-      Records.getAllDerivedDefinitions(DeclNodeClassName);
-
-  std::multimap<Record *, Record *> Tree;
-
-  const std::vector<Record *> Stmts =
-      Records.getAllDerivedDefinitions(DeclNodeClassName);
+  std::multimap<const Record *, const Record *> Tree;
 
-  for (auto *R : Stmts) {
+  for (const Record *R : Records.getAllDerivedDefinitions(DeclNodeClassName)) {
     if (auto *B = R->getValueAsOptionalDef(BaseFieldName))
       Tree.insert(std::make_pair(B, R));
   }
 
-  for (auto *DeclContext : DeclContextsVector) {
+  for (const Record *DeclContext :
+       Records.getAllDerivedDefinitions(DeclContextNodeClassName)) {
     printDeclContext(Tree, DeclContext, OS);
   }
 
diff --git clang/utils/TableGen/ClangASTPropertiesEmitter.cpp clang/utils/TableGen/ClangASTPropertiesEmitter.cpp
index de8dda60681f..70005da28559 100644
--- clang/utils/TableGen/ClangASTPropertiesEmitter.cpp
+++ clang/utils/TableGen/ClangASTPropertiesEmitter.cpp
@@ -88,99 +88,98 @@ struct CasedTypeInfo {
 };
 
 class ASTPropsEmitter {
-	raw_ostream &Out;
-	RecordKeeper &Records;
-	std::map<HasProperties, NodeInfo> NodeInfos;
+  raw_ostream &Out;
+  RecordKeeper &Records;
+  std::map<HasProperties, NodeInfo> NodeInfos;
   std::vector<PropertyType> AllPropertyTypes;
   std::map<PropertyType, CasedTypeInfo> CasedTypeInfos;
 
 public:
-	ASTPropsEmitter(RecordKeeper &records, raw_ostream &out)
-		: Out(out), Records(records) {
-
-		// Find all the properties.
-		for (Property property :
-           records.getAllDerivedDefinitions(PropertyClassName)) {
-			HasProperties node = property.getClass();
-			NodeInfos[node].Properties.push_back(property);
-		}
+  ASTPropsEmitter(RecordKeeper &records, raw_ostream &out)
+      : Out(out), Records(records) {
+
+    // Find all the properties.
+    for (Property property :
+         records.getAllDerivedDefinitions(PropertyClassName)) {
+      HasProperties node = property.getClass();
+      NodeInfos[node].Properties.push_back(property);
+    }
 
     // Find all the creation rules.
     for (CreationRule creationRule :
-           records.getAllDerivedDefinitions(CreationRuleClassName)) {
+         records.getAllDerivedDefinitions(CreationRuleClassName)) {
       HasProperties node = creationRule.getClass();
 
       auto &info = NodeInfos[node];
       if (info.Creator) {
-        PrintFatalError(creationRule.getLoc(),
-                        "multiple creator rules for \"" + node.getName()
-                          + "\"");
+        PrintFatalError(creationRule.getLoc(), "multiple creator rules for \"" +
+                                                   node.getName() + "\"");
       }
       info.Creator = creationRule;
     }
 
     // Find all the override rules.
     for (OverrideRule overrideRule :
-           records.getAllDerivedDefinitions(OverrideRuleClassName)) {
+         records.getAllDerivedDefinitions(OverrideRuleClassName)) {
       HasProperties node = overrideRule.getClass();
 
       auto &info = NodeInfos[node];
       if (info.Override) {
         PrintFatalError(overrideRule.getLoc(),
-                        "multiple override rules for \"" + node.getName()
-                          + "\"");
+                        "multiple override rules for \"" + node.getName() +
+                            "\"");
       }
       info.Override = overrideRule;
     }
 
     // Find all the write helper rules.
     for (ReadHelperRule helperRule :
-           records.getAllDerivedDefinitions(ReadHelperRuleClassName)) {
+         records.getAllDerivedDefinitions(ReadHelperRuleClassName)) {
       HasProperties node = helperRule.getClass();
 
       auto &info = NodeInfos[node];
       if (info.ReadHelper) {
         PrintFatalError(helperRule.getLoc(),
-                        "multiple write helper rules for \"" + node.getName()
-                          + "\"");
+                        "multiple write helper rules for \"" + node.getName() +
+                            "\"");
       }
       info.ReadHelper = helperRule;
     }
 
     // Find all the concrete property types.
     for (PropertyType type :
-           records.getAllDerivedDefinitions(PropertyTypeClassName)) {
+         records.getAllDerivedDefinitions(PropertyTypeClassName)) {
       // Ignore generic specializations; they're generally not useful when
       // emitting basic emitters etc.
-      if (type.isGenericSpecialization()) continue;
+      if (type.isGenericSpecialization())
+        continue;
 
       AllPropertyTypes.push_back(type);
     }
 
     // Find all the type kind rules.
     for (TypeKindRule kindRule :
-           records.getAllDerivedDefinitions(TypeKindClassName)) {
+         records.getAllDerivedDefinitions(TypeKindClassName)) {
       PropertyType type = kindRule.getParentType();
       auto &info = CasedTypeInfos[type];
       if (info.KindRule) {
-        PrintFatalError(kindRule.getLoc(),
-                        "multiple kind rules for \""
-                           + type.getCXXTypeName() + "\"");
+        PrintFatalError(kindRule.getLoc(), "multiple kind rules for \"" +
+                                               type.getCXXTypeName() + "\"");
       }
       info.KindRule = kindRule;
     }
 
     // Find all the type cases.
     for (TypeCase typeCase :
-           records.getAllDerivedDefinitions(TypeCaseClassName)) {
+         records.getAllDerivedDefinitions(TypeCaseClassName)) {
       CasedTypeInfos[typeCase.getParentType()].Cases.push_back(typeCase);
     }
 
     Validator(*this).validate();
-	}
+  }
 
   void visitAllProperties(HasProperties derived, const NodeInfo &derivedInfo,
-                          function_ref<void (Property)> visit) {
+                          function_ref<void(Property)> visit) {
     std::set<StringRef> ignoredProperties;
 
     auto overrideRule = derivedInfo.Override;
@@ -195,20 +194,19 @@ public:
 
     visitAllNodesWithInfo(derived, derivedInfo,
                           [&](HasProperties node, const NodeInfo &info) {
-      for (Property prop : info.Properties) {
-        if (ignoredProperties.count(prop.getName()))
-          continue;
+                            for (Property prop : info.Properties) {
+                              if (ignoredProperties.count(prop.getName()))
+                                continue;
 
-        visit(prop);
-      }
-    });
+                              visit(prop);
+                            }
+                          });
   }
 
-  void visitAllNodesWithInfo(HasProperties derivedNode,
-                             const NodeInfo &derivedNodeInfo,
-                             llvm::function_ref<void (HasProperties node,
-                                                      const NodeInfo &info)>
-                               visit) {
+  void visitAllNodesWithInfo(
+      HasProperties derivedNode, const NodeInfo &derivedNodeInfo,
+      llvm::function_ref<void(HasProperties node, const NodeInfo &info)>
+          visit) {
     visit(derivedNode, derivedNodeInfo);
 
     // Also walk the bases if appropriate.
@@ -217,7 +215,8 @@ public:
         auto it = NodeInfos.find(base);
 
         // Ignore intermediate nodes that don't add interesting properties.
-        if (it == NodeInfos.end()) continue;
+        if (it == NodeInfos.end())
+          continue;
         auto &baseInfo = it->second;
 
         visit(base, baseInfo);
@@ -225,14 +224,12 @@ public:
     }
   }
 
-  template <class NodeClass>
-  void emitNodeReaderClass() {
+  template <class NodeClass> void emitNodeReaderClass() {
     auto info = ReaderWriterInfo::forReader<NodeClass>();
     emitNodeReaderWriterClass<NodeClass>(info);
   }
 
-  template <class NodeClass>
-  void emitNodeWriterClass() {
+  template <class NodeClass> void emitNodeWriterClass() {
     auto info = ReaderWriterInfo::forWriter<NodeClass>();
     emitNodeReaderWriterClass<NodeClass>(info);
   }
@@ -241,8 +238,7 @@ public:
   void emitNodeReaderWriterClass(const ReaderWriterInfo &info);
 
   template <class NodeClass>
-  void emitNodeReaderWriterMethod(NodeClass node,
-                                  const ReaderWriterInfo &info);
+  void emitNodeReaderWriterMethod(NodeClass node, const ReaderWriterInfo &info);
 
   void emitPropertiedReaderWriterBody(HasProperties node,
                                       const ReaderWriterInfo &info);
diff --git clang/utils/TableGen/ClangAttrEmitter.cpp clang/utils/TableGen/ClangAttrEmitter.cpp
index adbe6af62d5c..9b2249ac90bc 100644
--- clang/utils/TableGen/ClangAttrEmitter.cpp
+++ clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -192,10 +192,9 @@ typedef std::vector<std::pair<std::string, const Record *>> ParsedAttrMap;
 static ParsedAttrMap getParsedAttrList(const RecordKeeper &Records,
                                        ParsedAttrMap *Dupes = nullptr,
                                        bool SemaOnly = true) {
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
   std::set<std::string> Seen;
   ParsedAttrMap R;
-  for (const auto *Attr : Attrs) {
+  for (const Record *Attr : Records.getAllDerivedDefinitions("Attr")) {
     if (!SemaOnly || Attr->getValueAsBit("SemaHandler")) {
       std::string AN;
       if (Attr->isSubClassOf("TargetSpecificAttr") &&
@@ -1911,12 +1910,10 @@ static LateAttrParseKind getLateAttrParseKind(const Record *Attr) {
 }
 
 // Emits the LateParsed property for attributes.
-static void emitClangAttrLateParsedListImpl(RecordKeeper &Records,
+static void emitClangAttrLateParsedListImpl(const RecordKeeper &Records,
                                             raw_ostream &OS,
                                             LateAttrParseKind LateParseMode) {
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
-
-  for (const auto *Attr : Attrs) {
+  for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     if (LateAttrParseKind LateParsed = getLateAttrParseKind(Attr);
         LateParsed != LateParseMode)
       continue;
@@ -1932,14 +1929,14 @@ static void emitClangAttrLateParsedListImpl(RecordKeeper &Records,
   }
 }
 
-static void emitClangAttrLateParsedList(RecordKeeper &Records,
+static void emitClangAttrLateParsedList(const RecordKeeper &Records,
                                         raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_LATE_PARSED_LIST)\n";
   emitClangAttrLateParsedListImpl(Records, OS, LateAttrParseKind::Standard);
   OS << "#endif // CLANG_ATTR_LATE_PARSED_LIST\n\n";
 }
 
-static void emitClangAttrLateParsedExperimentalList(RecordKeeper &Records,
+static void emitClangAttrLateParsedExperimentalList(const RecordKeeper &Records,
                                                     raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_LATE_PARSED_EXPERIMENTAL_EXT_LIST)\n";
   emitClangAttrLateParsedListImpl(Records, OS,
@@ -2066,7 +2063,7 @@ struct PragmaClangAttributeSupport {
   };
   llvm::DenseMap<const Record *, RuleOrAggregateRuleSet> SubjectsToRules;
 
-  PragmaClangAttributeSupport(RecordKeeper &Records);
+  PragmaClangAttributeSupport(const RecordKeeper &Records);
 
   bool isAttributedSupported(const Record &Attribute);
 
@@ -2105,9 +2102,7 @@ static bool doesDeclDeriveFrom(const Record *D, const Record *Base) {
 }
 
 PragmaClangAttributeSupport::PragmaClangAttributeSupport(
-    RecordKeeper &Records) {
-  std::vector<Record *> MetaSubjects =
-      Records.getAllDerivedDefinitions("AttrSubjectMatcherRule");
+    const RecordKeeper &Records) {
   auto MapFromSubjectsToRules = [this](const Record *SubjectContainer,
                                        const Record *MetaSubject,
                                        const Record *Constraint) {
@@ -2127,7 +2122,8 @@ PragmaClangAttributeSupport::PragmaClangAttributeSupport(
       }
     }
   };
-  for (const auto *MetaSubject : MetaSubjects) {
+  for (const auto *MetaSubject :
+       Records.getAllDerivedDefinitions("AttrSubjectMatcherRule")) {
     MapFromSubjectsToRules(MetaSubject, MetaSubject, /*Constraints=*/nullptr);
     std::vector<Record *> Constraints =
         MetaSubject->getValueAsListOfDefs("Constraints");
@@ -2135,11 +2131,10 @@ PragmaClangAttributeSupport::PragmaClangAttributeSupport(
       MapFromSubjectsToRules(Constraint, MetaSubject, Constraint);
   }
 
-  std::vector<Record *> Aggregates =
-      Records.getAllDerivedDefinitions("AttrSubjectMatcherAggregateRule");
-  std::vector<Record *> DeclNodes =
-    Records.getAllDerivedDefinitions(DeclNodeClassName);
-  for (const auto *Aggregate : Aggregates) {
+  ArrayRef<const Record *> DeclNodes =
+      Records.getAllDerivedDefinitions(DeclNodeClassName);
+  for (const auto *Aggregate :
+       Records.getAllDerivedDefinitions("AttrSubjectMatcherAggregateRule")) {
     Record *SubjectDecl = Aggregate->getValueAsDef("Subject");
 
     // Gather sub-classes of the aggregate subject that act as attribute
@@ -2169,7 +2164,7 @@ PragmaClangAttributeSupport::PragmaClangAttributeSupport(
 }
 
 static PragmaClangAttributeSupport &
-getPragmaAttributeSupport(RecordKeeper &Records) {
+getPragmaAttributeSupport(const RecordKeeper &Records) {
   static PragmaClangAttributeSupport Instance(Records);
   return Instance;
 }
@@ -2403,9 +2398,8 @@ std::map<std::string, std::vector<const Record *>> NameToAttrsMap;
 /// Build a map from the attribute name to the Attrs that use that name. If more
 /// than one Attr use a name, the arguments could be different so a more complex
 /// check is needed in the generated switch.
-void generateNameToAttrsMap(RecordKeeper &Records) {
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
-  for (const auto *A : Attrs) {
+void generateNameToAttrsMap(const RecordKeeper &Records) {
+  for (const auto *A : Records.getAllDerivedDefinitions("Attr")) {
     std::vector<FlattenedSpelling> Spellings = GetFlattenedSpellings(*A);
     for (const auto &S : Spellings) {
       auto It = NameToAttrsMap.find(S.name());
@@ -2510,12 +2504,11 @@ static bool isTypeArgument(const Record *Arg) {
 }
 
 /// Emits the first-argument-is-type property for attributes.
-static void emitClangAttrTypeArgList(RecordKeeper &Records, raw_ostream &OS) {
+static void emitClangAttrTypeArgList(const RecordKeeper &Records,
+                                     raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_TYPE_ARG_LIST)\n";
   std::map<std::string, FSIVecTy> FSIMap;
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
-
-  for (const auto *Attr : Attrs) {
+  for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     // Determine whether the first argument is a type.
     std::vector<Record *> Args = Attr->getValueAsListOfDefs("Args");
     if (Args.empty())
@@ -2531,7 +2524,8 @@ static void emitClangAttrTypeArgList(RecordKeeper &Records, raw_ostream &OS) {
 
 /// Emits the parse-arguments-in-unevaluated-context property for
 /// attributes.
-static void emitClangAttrArgContextList(RecordKeeper &Records, raw_ostream &OS) {
+static void emitClangAttrArgContextList(const RecordKeeper &Records,
+                                        raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_ARG_CONTEXT_LIST)\n";
   std::map<std::string, FSIVecTy> FSIMap;
   ParsedAttrMap Attrs = getParsedAttrList(Records);
@@ -2590,12 +2584,11 @@ static bool isVariadicStringLiteralArgument(const Record *Arg) {
   return ArgKind == "VariadicStringArgument";
 }
 
-static void emitClangAttrVariadicIdentifierArgList(RecordKeeper &Records,
+static void emitClangAttrVariadicIdentifierArgList(const RecordKeeper &Records,
                                                    raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_VARIADIC_IDENTIFIER_ARG_LIST)\n";
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
   std::map<std::string, FSIVecTy> FSIMap;
-  for (const auto *A : Attrs) {
+  for (const auto *A : Records.getAllDerivedDefinitions("Attr")) {
     // Determine whether the first argument is a variadic identifier.
     std::vector<Record *> Args = A->getValueAsListOfDefs("Args");
     if (Args.empty() || !isVariadicIdentifierArgument(Args[0]))
@@ -2608,8 +2601,9 @@ static void emitClangAttrVariadicIdentifierArgList(RecordKeeper &Records,
 
 // Emits the list of arguments that should be parsed as unevaluated string
 // literals for each attribute.
-static void emitClangAttrUnevaluatedStringLiteralList(RecordKeeper &Records,
-                                                      raw_ostream &OS) {
+static void
+emitClangAttrUnevaluatedStringLiteralList(const RecordKeeper &Records,
+                                          raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_STRING_LITERAL_ARG_LIST)\n";
 
   auto MakeMask = [](ArrayRef<Record *> Args) {
@@ -2626,9 +2620,8 @@ static void emitClangAttrUnevaluatedStringLiteralList(RecordKeeper &Records,
     return Bits;
   };
 
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
   std::map<std::string, FSIVecTy> FSIMap;
-  for (const auto *Attr : Attrs) {
+  for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     // Determine whether there are any string arguments.
     uint32_t ArgMask = MakeMask(Attr->getValueAsListOfDefs("Args"));
     if (!ArgMask)
@@ -2640,12 +2633,11 @@ static void emitClangAttrUnevaluatedStringLiteralList(RecordKeeper &Records,
 }
 
 // Emits the first-argument-is-identifier property for attributes.
-static void emitClangAttrIdentifierArgList(RecordKeeper &Records, raw_ostream &OS) {
+static void emitClangAttrIdentifierArgList(const RecordKeeper &Records,
+                                           raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_IDENTIFIER_ARG_LIST)\n";
-  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr");
-
   std::map<std::string, FSIVecTy> FSIMap;
-  for (const auto *Attr : Attrs) {
+  for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     // Determine whether the first argument is an identifier.
     std::vector<Record *> Args = Attr->getValueAsListOfDefs("Args");
     if (Args.empty() || !isIdentifierArgument(Args[0]))
@@ -2657,13 +2649,11 @@ static void emitClangAttrIdentifierArgList(RecordKeeper &Records, raw_ostream &O
 }
 
 // Emits the list for attributes having StrictEnumParameters.
-static void emitClangAttrStrictIdentifierArgList(RecordKeeper &Records,
+static void emitClangAttrStrictIdentifierArgList(const RecordKeeper &Records,
                                                  raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_STRICT_IDENTIFIER_ARG_LIST)\n";
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
-
   std::map<std::string, FSIVecTy> FSIMap;
-  for (const auto *Attr : Attrs) {
+  for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     if (!Attr->getValueAsBit("StrictEnumParameters"))
       continue;
     // Check that there is really an identifier argument.
@@ -2684,12 +2674,11 @@ static bool keywordThisIsaIdentifierInArgument(const Record *Arg) {
              .Default(false);
 }
 
-static void emitClangAttrThisIsaIdentifierArgList(RecordKeeper &Records,
+static void emitClangAttrThisIsaIdentifierArgList(const RecordKeeper &Records,
                                                   raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_THIS_ISA_IDENTIFIER_ARG_LIST)\n";
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
   std::map<std::string, FSIVecTy> FSIMap;
-  for (const auto *A : Attrs) {
+  for (const auto *A : Records.getAllDerivedDefinitions("Attr")) {
     // Determine whether the first argument is a variadic identifier.
     std::vector<Record *> Args = A->getValueAsListOfDefs("Args");
     if (Args.empty() || !keywordThisIsaIdentifierInArgument(Args[0]))
@@ -2700,7 +2689,7 @@ static void emitClangAttrThisIsaIdentifierArgList(RecordKeeper &Records,
   OS << "#endif // CLANG_ATTR_THIS_ISA_IDENTIFIER_ARG_LIST\n\n";
 }
 
-static void emitClangAttrAcceptsExprPack(RecordKeeper &Records,
+static void emitClangAttrAcceptsExprPack(const RecordKeeper &Records,
                                          raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_ACCEPTS_EXPR_PACK)\n";
   ParsedAttrMap Attrs = getParsedAttrList(Records);
@@ -2733,9 +2722,8 @@ static void emitFormInitializer(raw_ostream &OS,
      << " /*IsRegularKeywordAttribute*/}";
 }
 
-static void emitAttributes(RecordKeeper &Records, raw_ostream &OS,
+static void emitAttributes(const RecordKeeper &Records, raw_ostream &OS,
                            bool Header) {
-  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr");
   ParsedAttrMap AttrMap = getParsedAttrList(Records);
 
   // Helper to print the starting character of an attribute argument. If there
@@ -2750,7 +2738,7 @@ static void emitAttributes(RecordKeeper &Records, raw_ostream &OS,
      << "    OS << \", \";\n"
      << "}\n";
 
-  for (const auto *Attr : Attrs) {
+  for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     const Record &R = *Attr;
 
     // FIXME: Currently, documentation is generated as-needed due to the fact
@@ -3235,7 +3223,7 @@ static void emitAttributes(RecordKeeper &Records, raw_ostream &OS,
   }
 }
 // Emits the class definitions for attributes.
-void clang::EmitClangAttrClass(RecordKeeper &Records, raw_ostream &OS) {
+void clang::EmitClangAttrClass(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("Attribute classes' definitions", OS, Records);
 
   OS << "#ifndef LLVM_CLANG_ATTR_CLASSES_INC\n";
@@ -3247,19 +3235,17 @@ void clang::EmitClangAttrClass(RecordKeeper &Records, raw_ostream &OS) {
 }
 
 // Emits the class method definitions for attributes.
-void clang::EmitClangAttrImpl(RecordKeeper &Records, raw_ostream &OS) {
+void clang::EmitClangAttrImpl(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("Attribute classes' member function definitions", OS,
                        Records);
 
   emitAttributes(Records, OS, false);
 
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
-
   // Instead of relying on virtual dispatch we just create a huge dispatch
   // switch. This is both smaller and faster than virtual functions.
   auto EmitFunc = [&](const char *Method) {
     OS << "  switch (getKind()) {\n";
-    for (const auto *Attr : Attrs) {
+    for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
       const Record &R = *Attr;
       if (!R.getValueAsBit("ASTNode"))
         continue;
@@ -3285,7 +3271,7 @@ void clang::EmitClangAttrImpl(RecordKeeper &Records, raw_ostream &OS) {
 }
 
 static void emitAttrList(raw_ostream &OS, StringRef Class,
-                         const std::vector<Record*> &AttrList) {
+                         ArrayRef<const Record *> AttrList) {
   for (auto Cur : AttrList) {
     OS << Class << "(" << Cur->getName() << ")\n";
   }
@@ -3333,13 +3319,13 @@ namespace {
   /// A class of attributes.
   struct AttrClass {
     const AttrClassDescriptor &Descriptor;
-    Record *TheRecord;
+    const Record *TheRecord;
     AttrClass *SuperClass = nullptr;
     std::vector<AttrClass*> SubClasses;
-    std::vector<Record*> Attrs;
+    std::vector<const Record *> Attrs;
 
-    AttrClass(const AttrClassDescriptor &Descriptor, Record *R)
-      : Descriptor(Descriptor), TheRecord(R) {}
+    AttrClass(const AttrClassDescriptor &Descriptor, const Record *R)
+        : Descriptor(Descriptor), TheRecord(R) {}
 
     void emitDefaultDefines(raw_ostream &OS) const {
       // Default the macro unless this is a root class (i.e. Attr).
@@ -3361,7 +3347,7 @@ namespace {
       ::emitAttrList(OS, Descriptor.MacroName, Attrs);
     }
 
-    void classifyAttrOnRoot(Record *Attr) {
+    void classifyAttrOnRoot(const Record *Attr) {
       bool result = classifyAttr(Attr);
       assert(result && "failed to classify on root"); (void) result;
     }
@@ -3373,7 +3359,7 @@ namespace {
     }
 
   private:
-    bool classifyAttr(Record *Attr) {
+    bool classifyAttr(const Record *Attr) {
       // Check all the subclasses.
       for (auto SubClass : SubClasses) {
         if (SubClass->classifyAttr(Attr))
@@ -3389,13 +3375,13 @@ namespace {
       return false;
     }
 
-    Record *getFirstAttr() const {
+    const Record *getFirstAttr() const {
       if (!SubClasses.empty())
         return SubClasses.front()->getFirstAttr();
       return Attrs.front();
     }
 
-    Record *getLastAttr() const {
+    const Record *getLastAttr() const {
       if (!Attrs.empty())
         return Attrs.back();
       return SubClasses.back()->getLastAttr();
@@ -3407,7 +3393,7 @@ namespace {
     std::vector<std::unique_ptr<AttrClass>> Classes;
 
   public:
-    AttrClassHierarchy(RecordKeeper &Records) {
+    AttrClassHierarchy(const RecordKeeper &Records) {
       // Find records for all the classes.
       for (auto &Descriptor : AttrClassDescriptors) {
         Record *ClassRecord = Records.getClass(Descriptor.TableGenName);
@@ -3453,7 +3439,7 @@ namespace {
         Class->emitAttrRange(OS);
     }
 
-    void classifyAttr(Record *Attr) {
+    void classifyAttr(const Record *Attr) {
       // Add the attribute to the root class.
       Classes[0]->classifyAttrOnRoot(Attr);
     }
@@ -3467,7 +3453,7 @@ namespace {
       return nullptr;
     }
 
-    AttrClass *findSuperClass(Record *R) const {
+    AttrClass *findSuperClass(const Record *R) const {
       // TableGen flattens the superclass list, so we just need to walk it
       // in reverse.
       auto SuperClasses = R->getSuperClasses();
@@ -3484,7 +3470,7 @@ namespace {
 namespace clang {
 
 // Emits the enumeration list for attributes.
-void EmitClangAttrList(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrList(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("List of all attributes that Clang recognizes", OS,
                        Records);
 
@@ -3494,9 +3480,8 @@ void EmitClangAttrList(RecordKeeper &Records, raw_ostream &OS) {
   Hierarchy.emitDefaultDefines(OS);
   emitDefaultDefine(OS, "PRAGMA_SPELLING_ATTR", nullptr);
 
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
-  std::vector<Record *> PragmaAttrs;
-  for (auto *Attr : Attrs) {
+  std::vector<const Record *> PragmaAttrs;
+  for (auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     if (!Attr->getValueAsBit("ASTNode"))
       continue;
 
@@ -3525,7 +3510,8 @@ void EmitClangAttrList(RecordKeeper &Records, raw_ostream &OS) {
 }
 
 // Emits the enumeration list for attributes.
-void EmitClangAttrSubjectMatchRuleList(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrSubjectMatchRuleList(const RecordKeeper &Records,
+                                       raw_ostream &OS) {
   emitSourceFileHeader(
       "List of all attribute subject matching rules that Clang recognizes", OS,
       Records);
@@ -3537,17 +3523,16 @@ void EmitClangAttrSubjectMatchRuleList(RecordKeeper &Records, raw_ostream &OS) {
 }
 
 // Emits the code to read an attribute from a precompiled header.
-void EmitClangAttrPCHRead(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrPCHRead(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("Attribute deserialization code", OS, Records);
 
   Record *InhClass = Records.getClass("InheritableAttr");
-  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr"),
-                       ArgRecords;
+  std::vector<Record *> ArgRecords;
   std::vector<std::unique_ptr<Argument>> Args;
   std::unique_ptr<VariadicExprArgument> DelayedArgs;
 
   OS << "  switch (Kind) {\n";
-  for (const auto *Attr : Attrs) {
+  for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     const Record &R = *Attr;
     if (!R.getValueAsBit("ASTNode"))
       continue;
@@ -3592,19 +3577,17 @@ void EmitClangAttrPCHRead(RecordKeeper &Records, raw_ostream &OS) {
 }
 
 // Emits the code to write an attribute to a precompiled header.
-void EmitClangAttrPCHWrite(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrPCHWrite(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("Attribute serialization code", OS, Records);
 
   Record *InhClass = Records.getClass("InheritableAttr");
-  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr"), Args;
-
   OS << "  switch (A->getKind()) {\n";
-  for (const auto *Attr : Attrs) {
+  for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     const Record &R = *Attr;
     if (!R.getValueAsBit("ASTNode"))
       continue;
     OS << "  case attr::" << R.getName() << ": {\n";
-    Args = R.getValueAsListOfDefs("Args");
+    std::vector<Record *> Args = R.getValueAsListOfDefs("Args");
     if (R.isSubClassOf(InhClass) || !Args.empty())
       OS << "    const auto *SA = cast<" << R.getName()
          << "Attr>(A);\n";
@@ -3784,7 +3767,7 @@ static void GenerateHasAttrSpellingStringSwitch(
 namespace clang {
 
 // Emits list of regular keyword attributes with info about their arguments.
-void EmitClangRegularKeywordAttributeInfo(RecordKeeper &Records,
+void EmitClangRegularKeywordAttributeInfo(const RecordKeeper &Records,
                                           raw_ostream &OS) {
   emitSourceFileHeader(
       "A list of regular keyword attributes generated from the attribute"
@@ -3808,13 +3791,12 @@ void EmitClangRegularKeywordAttributeInfo(RecordKeeper &Records,
 }
 
 // Emits the list of spellings for attributes.
-void EmitClangAttrHasAttrImpl(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrHasAttrImpl(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("Code to implement the __has_attribute logic", OS,
                        Records);
 
   // Separate all of the attributes out into four group: generic, C++11, GNU,
   // and declspecs. Then generate a big switch statement for each of them.
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
   std::vector<std::pair<const Record *, FlattenedSpelling>> Declspec, Microsoft,
       GNU, Pragma, HLSLAnnotation;
   std::map<std::string,
@@ -3823,7 +3805,7 @@ void EmitClangAttrHasAttrImpl(RecordKeeper &Records, raw_ostream &OS) {
 
   // Walk over the list of all attributes, and split them out based on the
   // spelling variety.
-  for (auto *R : Attrs) {
+  for (auto *R : Records.getAllDerivedDefinitions("Attr")) {
     std::vector<FlattenedSpelling> Spellings = GetFlattenedSpellings(*R);
     for (const auto &SI : Spellings) {
       const std::string &Variety = SI.variety();
@@ -3895,7 +3877,8 @@ void EmitClangAttrHasAttrImpl(RecordKeeper &Records, raw_ostream &OS) {
   OS << "}\n";
 }
 
-void EmitClangAttrSpellingListIndex(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrSpellingListIndex(const RecordKeeper &Records,
+                                    raw_ostream &OS) {
   emitSourceFileHeader("Code to translate different attribute spellings into "
                        "internal identifiers",
                        OS, Records);
@@ -3927,16 +3910,14 @@ void EmitClangAttrSpellingListIndex(RecordKeeper &Records, raw_ostream &OS) {
 }
 
 // Emits code used by RecursiveASTVisitor to visit attributes
-void EmitClangAttrASTVisitor(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrASTVisitor(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("Used by RecursiveASTVisitor to visit attributes.", OS,
                        Records);
-
-  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr");
-
   // Write method declarations for Traverse* methods.
   // We emit this here because we only generate methods for attributes that
   // are declared as ASTNodes.
   OS << "#ifdef ATTR_VISITOR_DECLS_ONLY\n\n";
+  ArrayRef<const Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
   for (const auto *Attr : Attrs) {
     const Record &R = *Attr;
     if (!R.getValueAsBit("ASTNode"))
@@ -3999,7 +3980,7 @@ void EmitClangAttrASTVisitor(RecordKeeper &Records, raw_ostream &OS) {
   OS << "#endif  // ATTR_VISITOR_DECLS_ONLY\n";
 }
 
-void EmitClangAttrTemplateInstantiateHelper(const std::vector<Record *> &Attrs,
+void EmitClangAttrTemplateInstantiateHelper(ArrayRef<const Record *> Attrs,
                                             raw_ostream &OS,
                                             bool AppliesToDecl) {
 
@@ -4053,11 +4034,12 @@ void EmitClangAttrTemplateInstantiateHelper(const std::vector<Record *> &Attrs,
 }
 
 // Emits code to instantiate dependent attributes on templates.
-void EmitClangAttrTemplateInstantiate(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrTemplateInstantiate(const RecordKeeper &Records,
+                                      raw_ostream &OS) {
   emitSourceFileHeader("Template instantiation code for attributes", OS,
                        Records);
 
-  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr");
+  ArrayRef<const Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
 
   OS << "namespace clang {\n"
      << "namespace sema {\n\n"
@@ -4076,7 +4058,7 @@ void EmitClangAttrTemplateInstantiate(RecordKeeper &Records, raw_ostream &OS) {
 }
 
 // Emits the list of parsed attributes.
-void EmitClangAttrParsedAttrList(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrParsedAttrList(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("List of all attributes that Clang recognizes", OS,
                        Records);
 
@@ -4348,12 +4330,6 @@ static void GenerateMutualExclusionsChecks(const Record &Attr,
                                            raw_ostream &OS,
                                            raw_ostream &MergeDeclOS,
                                            raw_ostream &MergeStmtOS) {
-  // Find all of the definitions that inherit from MutualExclusions and include
-  // the given attribute in the list of exclusions to generate the
-  // diagMutualExclusion() check.
-  std::vector<Record *> ExclusionsList =
-      Records.getAllDerivedDefinitions("MutualExclusions");
-
   // We don't do any of this magic for type attributes yet.
   if (Attr.isSubClassOf("TypeAttr"))
     return;
@@ -4367,7 +4343,11 @@ static void GenerateMutualExclusionsChecks(const Record &Attr,
 
   std::vector<std::string> DeclAttrs, StmtAttrs;
 
-  for (const Record *Exclusion : ExclusionsList) {
+  // Find all of the definitions that inherit from MutualExclusions and include
+  // the given attribute in the list of exclusions to generate the
+  // diagMutualExclusion() check.
+  for (const Record *Exclusion :
+       Records.getAllDerivedDefinitions("MutualExclusions")) {
     std::vector<Record *> MutuallyExclusiveAttrs =
         Exclusion->getValueAsListOfDefs("Exclusions");
     auto IsCurAttr = [Attr](const Record *R) {
@@ -4670,7 +4650,8 @@ void GenerateIsParamExpr(const Record &Attr, raw_ostream &OS) {
   OS << "}\n\n";
 }
 
-void GenerateHandleAttrWithDelayedArgs(RecordKeeper &Records, raw_ostream &OS) {
+void GenerateHandleAttrWithDelayedArgs(const RecordKeeper &Records,
+                                       raw_ostream &OS) {
   OS << "static void handleAttrWithDelayedArgs(Sema &S, Decl *D, ";
   OS << "const ParsedAttr &Attr) {\n";
   OS << "  SmallVector<Expr *, 4> ArgExprs;\n";
@@ -4708,7 +4689,7 @@ static bool IsKnownToGCC(const Record &Attr) {
 }
 
 /// Emits the parsed attribute helpers
-void EmitClangAttrParsedAttrImpl(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrParsedAttrImpl(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("Parsed attribute helpers", OS, Records);
 
   OS << "#if !defined(WANT_DECL_MERGE_LOGIC) && "
@@ -4872,14 +4853,14 @@ void EmitClangAttrParsedAttrImpl(RecordKeeper &Records, raw_ostream &OS) {
 }
 
 // Emits the kind list of parsed attributes
-void EmitClangAttrParsedAttrKinds(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrParsedAttrKinds(const RecordKeeper &Records,
+                                  raw_ostream &OS) {
   emitSourceFileHeader("Attribute name matcher", OS, Records);
 
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
   std::vector<StringMatcher::StringPair> GNU, Declspec, Microsoft, CXX11,
       Keywords, Pragma, C23, HLSLAnnotation;
   std::set<std::string> Seen;
-  for (const auto *A : Attrs) {
+  for (const auto *A : Records.getAllDerivedDefinitions("Attr")) {
     const Record &Attr = *A;
 
     bool SemaHandler = Attr.getValueAsBit("SemaHandler");
@@ -4973,11 +4954,10 @@ void EmitClangAttrParsedAttrKinds(RecordKeeper &Records, raw_ostream &OS) {
 }
 
 // Emits the code to dump an attribute.
-void EmitClangAttrTextNodeDump(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrTextNodeDump(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("Attribute text node dumper", OS, Records);
 
-  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr"), Args;
-  for (const auto *Attr : Attrs) {
+  for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     const Record &R = *Attr;
     if (!R.getValueAsBit("ASTNode"))
       continue;
@@ -4993,7 +4973,7 @@ void EmitClangAttrTextNodeDump(RecordKeeper &Records, raw_ostream &OS) {
     if (Spellings.size() > 1 && !SpellingNamesAreCommon(Spellings))
       SS << "    OS << \" \" << A->getSpelling();\n";
 
-    Args = R.getValueAsListOfDefs("Args");
+    std::vector<Record *> Args = R.getValueAsListOfDefs("Args");
     for (const auto *Arg : Args)
       createArgument(*Arg, R.getName())->writeDump(SS);
 
@@ -5012,11 +4992,10 @@ void EmitClangAttrTextNodeDump(RecordKeeper &Records, raw_ostream &OS) {
   }
 }
 
-void EmitClangAttrNodeTraverse(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrNodeTraverse(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("Attribute text node traverser", OS, Records);
 
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr"), Args;
-  for (const auto *Attr : Attrs) {
+  for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     const Record &R = *Attr;
     if (!R.getValueAsBit("ASTNode"))
       continue;
@@ -5024,7 +5003,7 @@ void EmitClangAttrNodeTraverse(RecordKeeper &Records, raw_ostream &OS) {
     std::string FunctionContent;
     llvm::raw_string_ostream SS(FunctionContent);
 
-    Args = R.getValueAsListOfDefs("Args");
+    std::vector<Record *> Args = R.getValueAsListOfDefs("Args");
     for (const auto *Arg : Args)
       createArgument(*Arg, R.getName())->writeDumpChildren(SS);
     if (Attr->getValueAsBit("AcceptsExprPack"))
@@ -5041,7 +5020,8 @@ void EmitClangAttrNodeTraverse(RecordKeeper &Records, raw_ostream &OS) {
   }
 }
 
-void EmitClangAttrParserStringSwitches(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrParserStringSwitches(const RecordKeeper &Records,
+                                       raw_ostream &OS) {
   generateNameToAttrsMap(Records);
   emitSourceFileHeader("Parser-related llvm::StringSwitch cases", OS, Records);
   emitClangAttrArgContextList(Records, OS);
@@ -5056,16 +5036,15 @@ void EmitClangAttrParserStringSwitches(RecordKeeper &Records, raw_ostream &OS) {
   emitClangAttrStrictIdentifierArgList(Records, OS);
 }
 
-void EmitClangAttrSubjectMatchRulesParserStringSwitches(RecordKeeper &Records,
-                                                        raw_ostream &OS) {
+void EmitClangAttrSubjectMatchRulesParserStringSwitches(
+    const RecordKeeper &Records, raw_ostream &OS) {
   getPragmaAttributeSupport(Records).generateParsingHelpers(OS);
 }
 
-void EmitClangAttrDocTable(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrDocTable(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("Clang attribute documentation", OS, Records);
 
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
-  for (const auto *A : Attrs) {
+  for (const auto *A : Records.getAllDerivedDefinitions("Attr")) {
     if (!A->getValueAsBit("ASTNode"))
       continue;
     std::vector<Record *> Docs = A->getValueAsListOfDefs("Documentation");
@@ -5210,7 +5189,7 @@ GetAttributeHeadingAndSpellings(const Record &Documentation,
   return std::make_pair(std::move(Heading), std::move(SupportedSpellings));
 }
 
-static void WriteDocumentation(RecordKeeper &Records,
+static void WriteDocumentation(const RecordKeeper &Records,
                                const DocumentationData &Doc, raw_ostream &OS) {
   OS << Doc.Heading << "\n" << std::string(Doc.Heading.length(), '-') << "\n";
 
@@ -5265,7 +5244,7 @@ static void WriteDocumentation(RecordKeeper &Records,
   OS << "\n\n\n";
 }
 
-void EmitClangAttrDocs(RecordKeeper &Records, raw_ostream &OS) {
+void EmitClangAttrDocs(const RecordKeeper &Records, raw_ostream &OS) {
   // Get the documentation introduction paragraph.
   const Record *Documentation = Records.getDef("GlobalDocumentation");
   if (!Documentation) {
@@ -5278,7 +5257,6 @@ void EmitClangAttrDocs(RecordKeeper &Records, raw_ostream &OS) {
 
   // Gather the Documentation lists from each of the attributes, based on the
   // category provided.
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
   struct CategoryLess {
     bool operator()(const Record *L, const Record *R) const {
       return L->getValueAsString("Name") < R->getValueAsString("Name");
@@ -5286,7 +5264,7 @@ void EmitClangAttrDocs(RecordKeeper &Records, raw_ostream &OS) {
   };
   std::map<const Record *, std::vector<DocumentationData>, CategoryLess>
       SplitDocs;
-  for (const auto *A : Attrs) {
+  for (const auto *A : Records.getAllDerivedDefinitions("Attr")) {
     const Record &Attr = *A;
     std::vector<Record *> Docs = Attr.getValueAsListOfDefs("Documentation");
     for (const auto *D : Docs) {
@@ -5325,7 +5303,7 @@ void EmitClangAttrDocs(RecordKeeper &Records, raw_ostream &OS) {
   }
 }
 
-void EmitTestPragmaAttributeSupportedAttributes(RecordKeeper &Records,
+void EmitTestPragmaAttributeSupportedAttributes(const RecordKeeper &Records,
                                                 raw_ostream &OS) {
   PragmaClangAttributeSupport Support = getPragmaAttributeSupport(Records);
   ParsedAttrMap Attrs = getParsedAttrList(Records);
diff --git clang/utils/TableGen/ClangBuiltinsEmitter.cpp clang/utils/TableGen/ClangBuiltinsEmitter.cpp
index 94f12a08164f..4ae7600a392b 100644
--- clang/utils/TableGen/ClangBuiltinsEmitter.cpp
+++ clang/utils/TableGen/ClangBuiltinsEmitter.cpp
@@ -345,7 +345,7 @@ void EmitBuiltin(llvm::raw_ostream &OS, const Record *Builtin) {
 }
 } // namespace
 
-void clang::EmitClangBuiltins(llvm::RecordKeeper &Records,
+void clang::EmitClangBuiltins(const llvm::RecordKeeper &Records,
                               llvm::raw_ostream &OS) {
   emitSourceFileHeader("List of builtins that Clang recognizes", OS);
 
diff --git clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
index aee7d38786a5..1a2503dcf660 100644
--- clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
+++ clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
@@ -20,16 +20,16 @@
 
 using namespace llvm;
 
-void clang::EmitClangCommentCommandInfo(RecordKeeper &Records,
+void clang::EmitClangCommentCommandInfo(const RecordKeeper &Records,
                                         raw_ostream &OS) {
   emitSourceFileHeader("A list of commands useable in documentation comments",
                        OS, Records);
 
   OS << "namespace {\n"
         "const CommandInfo Commands[] = {\n";
-  std::vector<Record *> Tags = Records.getAllDerivedDefinitions("Command");
+  ArrayRef<const Record *> Tags = Records.getAllDerivedDefinitions("Command");
   for (size_t i = 0, e = Tags.size(); i != e; ++i) {
-    Record &Tag = *Tags[i];
+    const Record &Tag = *Tags[i];
     OS << "  { "
        << "\"" << Tag.getValueAsString("Name") << "\", "
        << "\"" << Tag.getValueAsString("EndCommandName") << "\", " << i << ", "
@@ -62,7 +62,7 @@ void clang::EmitClangCommentCommandInfo(RecordKeeper &Records,
 
   std::vector<StringMatcher::StringPair> Matches;
   for (size_t i = 0, e = Tags.size(); i != e; ++i) {
-    Record &Tag = *Tags[i];
+    const Record &Tag = *Tags[i];
     std::string Name = std::string(Tag.getValueAsString("Name"));
     std::string Return;
     raw_string_ostream(Return) << "return &Commands[" << i << "];";
@@ -112,7 +112,7 @@ static std::string MangleName(StringRef Str) {
   return Mangled;
 }
 
-void clang::EmitClangCommentCommandList(RecordKeeper &Records,
+void clang::EmitClangCommentCommandList(const RecordKeeper &Records,
                                         raw_ostream &OS) {
   emitSourceFileHeader("A list of commands useable in documentation comments",
                        OS, Records);
@@ -121,9 +121,9 @@ void clang::EmitClangCommentCommandList(RecordKeeper &Records,
      << "#  define COMMENT_COMMAND(NAME)\n"
      << "#endif\n";
 
-  std::vector<Record *> Tags = Records.getAllDerivedDefinitions("Command");
+  ArrayRef<const Record *> Tags = Records.getAllDerivedDefinitions("Command");
   for (size_t i = 0, e = Tags.size(); i != e; ++i) {
-    Record &Tag = *Tags[i];
+    const Record &Tag = *Tags[i];
     std::string MangledName = MangleName(Tag.getValueAsString("Name"));
 
     OS << "COMMENT_COMMAND(" << MangledName << ")\n";
diff --git clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
index f1cd9af0519d..bd75b3f6b652 100644
--- clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
+++ clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
@@ -46,21 +46,17 @@ static bool translateCodePointToUTF8(unsigned CodePoint,
   return true;
 }
 
-void clang::EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
-                                                         raw_ostream &OS) {
-  std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
+void clang::EmitClangCommentHTMLNamedCharacterReferences(
+    const RecordKeeper &Records, raw_ostream &OS) {
   std::vector<StringMatcher::StringPair> NameToUTF8;
   SmallString<32> CLiteral;
-  for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
-       I != E; ++I) {
-    Record &Tag = **I;
-    std::string Spelling = std::string(Tag.getValueAsString("Spelling"));
-    uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
+  for (const Record *Tag : Records.getAllDerivedDefinitions("NCR")) {
+    std::string Spelling = std::string(Tag->getValueAsString("Spelling"));
+    uint64_t CodePoint = Tag->getValueAsInt("CodePoint");
     CLiteral.clear();
     CLiteral.append("return ");
     if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
-      SrcMgr.PrintMessage(Tag.getLoc().front(),
-                          SourceMgr::DK_Error,
+      SrcMgr.PrintMessage(Tag->getLoc().front(), SourceMgr::DK_Error,
                           Twine("invalid code point"));
       continue;
     }
diff --git clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp
index 3dc1098753e0..a457315bc62c 100644
--- clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp
+++ clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp
@@ -19,10 +19,11 @@
 
 using namespace llvm;
 
-void clang::EmitClangCommentHTMLTags(RecordKeeper &Records, raw_ostream &OS) {
-  std::vector<Record *> Tags = Records.getAllDerivedDefinitions("Tag");
+void clang::EmitClangCommentHTMLTags(const RecordKeeper &Records,
+                                     raw_ostream &OS) {
+  ArrayRef<const Record *> Tags = Records.getAllDerivedDefinitions("Tag");
   std::vector<StringMatcher::StringPair> Matches;
-  for (Record *Tag : Tags) {
+  for (const Record *Tag : Tags) {
     Matches.emplace_back(std::string(Tag->getValueAsString("Spelling")),
                          "return true;");
   }
@@ -35,12 +36,12 @@ void clang::EmitClangCommentHTMLTags(RecordKeeper &Records, raw_ostream &OS) {
      << "}\n\n";
 }
 
-void clang::EmitClangCommentHTMLTagsProperties(RecordKeeper &Records,
+void clang::EmitClangCommentHTMLTagsProperties(const RecordKeeper &Records,
                                                raw_ostream &OS) {
-  std::vector<Record *> Tags = Records.getAllDerivedDefinitions("Tag");
+  ArrayRef<const Record *> Tags = Records.getAllDerivedDefinitions("Tag");
   std::vector<StringMatcher::StringPair> MatchesEndTagOptional;
   std::vector<StringMatcher::StringPair> MatchesEndTagForbidden;
-  for (Record *Tag : Tags) {
+  for (const Record *Tag : Tags) {
     std::string Spelling = std::string(Tag->getValueAsString("Spelling"));
     StringMatcher::StringPair Match(Spelling, "return true;");
     if (Tag->getValueAsBit("EndTagOptional"))
diff --git clang/utils/TableGen/ClangDataCollectorsEmitter.cpp clang/utils/TableGen/ClangDataCollectorsEmitter.cpp
index 45082935c1f7..dae6710d7523 100644
--- clang/utils/TableGen/ClangDataCollectorsEmitter.cpp
+++ clang/utils/TableGen/ClangDataCollectorsEmitter.cpp
@@ -4,7 +4,7 @@
 
 using namespace llvm;
 
-void clang::EmitClangDataCollectors(RecordKeeper &RK, raw_ostream &OS) {
+void clang::EmitClangDataCollectors(const RecordKeeper &RK, raw_ostream &OS) {
   const auto &Defs = RK.getClasses();
   for (const auto &Entry : Defs) {
     Record &R = *Entry.second;
diff --git clang/utils/TableGen/ClangDiagnosticsEmitter.cpp clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
index 6ca24a8c74b2..773668caa757 100644
--- clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
+++ clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
@@ -39,12 +39,13 @@ using namespace llvm;
 
 namespace {
 class DiagGroupParentMap {
-  RecordKeeper &Records;
-  std::map<const Record*, std::vector<Record*> > Mapping;
+  const RecordKeeper &Records;
+  std::map<const Record *, std::vector<const Record *>> Mapping;
+
 public:
-  DiagGroupParentMap(RecordKeeper &records) : Records(records) {
-    std::vector<Record*> DiagGroups
-      = Records.getAllDerivedDefinitions("DiagGroup");
+  DiagGroupParentMap(const RecordKeeper &records) : Records(records) {
+    ArrayRef<const Record *> DiagGroups =
+        Records.getAllDerivedDefinitions("DiagGroup");
     for (unsigned i = 0, e = DiagGroups.size(); i != e; ++i) {
       std::vector<Record*> SubGroups =
         DiagGroups[i]->getValueAsListOfDefs("SubGroups");
@@ -53,7 +54,7 @@ public:
     }
   }
 
-  const std::vector<Record*> &getParents(const Record *Group) {
+  const std::vector<const Record *> &getParents(const Record *Group) {
     return Mapping[Group];
   }
 };
@@ -68,7 +69,8 @@ getCategoryFromDiagGroup(const Record *Group,
 
   // The diag group may the subgroup of one or more other diagnostic groups,
   // check these for a category as well.
-  const std::vector<Record*> &Parents = DiagGroupParents.getParents(Group);
+  const std::vector<const Record *> &Parents =
+      DiagGroupParents.getParents(Group);
   for (unsigned i = 0, e = Parents.size(); i != e; ++i) {
     CatName = getCategoryFromDiagGroup(Parents[i], DiagGroupParents);
     if (!CatName.empty()) return CatName;
@@ -94,19 +96,19 @@ static std::string getDiagnosticCategory(const Record *R,
 
 namespace {
   class DiagCategoryIDMap {
-    RecordKeeper &Records;
+    const RecordKeeper &Records;
     StringMap<unsigned> CategoryIDs;
     std::vector<std::string> CategoryStrings;
   public:
-    DiagCategoryIDMap(RecordKeeper &records) : Records(records) {
+    DiagCategoryIDMap(const RecordKeeper &records) : Records(records) {
       DiagGroupParentMap ParentInfo(Records);
 
       // The zero'th category is "".
       CategoryStrings.push_back("");
       CategoryIDs[""] = 0;
 
-      std::vector<Record*> Diags =
-      Records.getAllDerivedDefinitions("Diagnostic");
+      ArrayRef<const Record *> Diags =
+          Records.getAllDerivedDefinitions("Diagnostic");
       for (unsigned i = 0, e = Diags.size(); i != e; ++i) {
         std::string Category = getDiagnosticCategory(Diags[i], ParentInfo);
         if (Category.empty()) continue;  // Skip diags with no category.
@@ -153,8 +155,8 @@ static bool diagGroupBeforeByName(const Record *LHS, const Record *RHS) {
 
 /// Invert the 1-[0/1] mapping of diags to group into a one to many
 /// mapping of groups to diags in the group.
-static void groupDiagnostics(const std::vector<Record*> &Diags,
-                             const std::vector<Record*> &DiagGroups,
+static void groupDiagnostics(ArrayRef<const Record *> Diags,
+                             ArrayRef<const Record *> DiagGroups,
                              std::map<std::string, GroupInfo> &DiagsInGroup) {
 
   for (unsigned i = 0, e = Diags.size(); i != e; ++i) {
@@ -172,7 +174,7 @@ static void groupDiagnostics(const std::vector<Record*> &Diags,
   // Add all DiagGroup's to the DiagsInGroup list to make sure we pick up empty
   // groups (these are warnings that GCC supports that clang never produces).
   for (unsigned i = 0, e = DiagGroups.size(); i != e; ++i) {
-    Record *Group = DiagGroups[i];
+    const Record *Group = DiagGroups[i];
     GroupInfo &GI =
         DiagsInGroup[std::string(Group->getValueAsString("GroupName"))];
     GI.GroupName = Group->getName();
@@ -255,20 +257,18 @@ class InferPedantic {
       GMap;
 
   DiagGroupParentMap &DiagGroupParents;
-  const std::vector<Record*> &Diags;
-  const std::vector<Record*> DiagGroups;
+  ArrayRef<const Record *> Diags;
+  const std::vector<const Record *> DiagGroups;
   std::map<std::string, GroupInfo> &DiagsInGroup;
   llvm::DenseSet<const Record*> DiagsSet;
   GMap GroupCount;
 public:
   InferPedantic(DiagGroupParentMap &DiagGroupParents,
-                const std::vector<Record*> &Diags,
-                const std::vector<Record*> &DiagGroups,
+                ArrayRef<const Record *> Diags,
+                ArrayRef<const Record *> DiagGroups,
                 std::map<std::string, GroupInfo> &DiagsInGroup)
-  : DiagGroupParents(DiagGroupParents),
-  Diags(Diags),
-  DiagGroups(DiagGroups),
-  DiagsInGroup(DiagsInGroup) {}
+      : DiagGroupParents(DiagGroupParents), Diags(Diags),
+        DiagGroups(DiagGroups), DiagsInGroup(DiagsInGroup) {}
 
   /// Compute the set of diagnostics and groups that are immediately
   /// in -Wpedantic.
@@ -302,7 +302,8 @@ bool InferPedantic::isSubGroupOfGroup(const Record *Group,
   if (GName == GroupName)
     return true;
 
-  const std::vector<Record*> &Parents = DiagGroupParents.getParents(Group);
+  const std::vector<const Record *> &Parents =
+      DiagGroupParents.getParents(Group);
   for (unsigned i = 0, e = Parents.size(); i != e; ++i)
     if (isSubGroupOfGroup(Parents[i], GName))
       return true;
@@ -347,7 +348,8 @@ void InferPedantic::markGroup(const Record *Group) {
   // group's count is equal to the number of subgroups and diagnostics in
   // that group, we can safely add this group to -Wpedantic.
   if (groupInPedantic(Group, /* increment */ true)) {
-    const std::vector<Record*> &Parents = DiagGroupParents.getParents(Group);
+    const std::vector<const Record *> &Parents =
+        DiagGroupParents.getParents(Group);
     for (unsigned i = 0, e = Parents.size(); i != e; ++i)
       markGroup(Parents[i]);
   }
@@ -359,7 +361,7 @@ void InferPedantic::compute(VecOrSet DiagsInPedantic,
   // "pedantic" group.  For those that aren't explicitly included in -Wpedantic,
   // mark them for consideration to be included in -Wpedantic directly.
   for (unsigned i = 0, e = Diags.size(); i != e; ++i) {
-    Record *R = Diags[i];
+    const Record *R = Diags[i];
     if (isExtension(R) && isOffByDefault(R)) {
       DiagsSet.insert(R);
       if (DefInit *Group = dyn_cast<DefInit>(R->getValueInit("Group"))) {
@@ -375,7 +377,7 @@ void InferPedantic::compute(VecOrSet DiagsInPedantic,
   // march through Diags a second time to ensure the results are emitted
   // in deterministic order.
   for (unsigned i = 0, e = Diags.size(); i != e; ++i) {
-    Record *R = Diags[i];
+    const Record *R = Diags[i];
     if (!DiagsSet.count(R))
       continue;
     // Check if the group is implicitly in -Wpedantic.  If so,
@@ -401,13 +403,14 @@ void InferPedantic::compute(VecOrSet DiagsInPedantic,
   // march through the groups to ensure the results are emitted
   /// in a deterministc order.
   for (unsigned i = 0, ei = DiagGroups.size(); i != ei; ++i) {
-    Record *Group = DiagGroups[i];
+    const Record *Group = DiagGroups[i];
     if (!groupInPedantic(Group))
       continue;
 
-    const std::vector<Record*> &Parents = DiagGroupParents.getParents(Group);
-    bool AllParentsInPedantic =
-        llvm::all_of(Parents, [&](Record *R) { return groupInPedantic(R); });
+    const std::vector<const Record *> &Parents =
+        DiagGroupParents.getParents(Group);
+    bool AllParentsInPedantic = llvm::all_of(
+        Parents, [&](const Record *R) { return groupInPedantic(R); });
     // If all the parents are in -Wpedantic, this means that this diagnostic
     // group will be indirectly included by -Wpedantic already.  In that
     // case, do not add it directly to -Wpedantic.  If the group has no
@@ -583,7 +586,7 @@ struct DiagnosticTextBuilder {
   DiagnosticTextBuilder(DiagnosticTextBuilder const &) = delete;
   DiagnosticTextBuilder &operator=(DiagnosticTextBuilder const &) = delete;
 
-  DiagnosticTextBuilder(RecordKeeper &Records) {
+  DiagnosticTextBuilder(const RecordKeeper &Records) {
     // Build up the list of substitution records.
     for (auto *S : Records.getAllDerivedDefinitions("TextSubstitution")) {
       EvaluatingRecordGuard Guard(&EvaluatingRecord, S);
@@ -593,7 +596,7 @@ struct DiagnosticTextBuilder {
 
     // Check that no diagnostic definitions have the same name as a
     // substitution.
-    for (Record *Diag : Records.getAllDerivedDefinitions("Diagnostic")) {
+    for (const Record *Diag : Records.getAllDerivedDefinitions("Diagnostic")) {
       StringRef Name = Diag->getName();
       if (Substitutions.count(Name))
         llvm::PrintFatalError(
@@ -1407,7 +1410,7 @@ static void verifyDiagnosticWording(const Record &Diag) {
 
 /// ClangDiagsDefsEmitter - The top-level class emits .def files containing
 /// declarations of Clang diagnostics.
-void clang::EmitClangDiagsDefs(RecordKeeper &Records, raw_ostream &OS,
+void clang::EmitClangDiagsDefs(const RecordKeeper &Records, raw_ostream &OS,
                                const std::string &Component) {
   // Write the #if guard
   if (!Component.empty()) {
@@ -1421,10 +1424,11 @@ void clang::EmitClangDiagsDefs(RecordKeeper &Records, raw_ostream &OS,
 
   DiagnosticTextBuilder DiagTextBuilder(Records);
 
-  std::vector<Record *> Diags = Records.getAllDerivedDefinitions("Diagnostic");
+  ArrayRef<const Record *> Diags =
+      Records.getAllDerivedDefinitions("Diagnostic");
 
-  std::vector<Record*> DiagGroups
-    = Records.getAllDerivedDefinitions("DiagGroup");
+  ArrayRef<const Record *> DiagGroups =
+      Records.getAllDerivedDefinitions("DiagGroup");
 
   std::map<std::string, GroupInfo> DiagsInGroup;
   groupDiagnostics(Diags, DiagGroups, DiagsInGroup);
@@ -1764,7 +1768,7 @@ static void emitDiagTable(std::map<std::string, GroupInfo> &DiagsInGroup,
 ///   CATEGORY("Lambda Issue", DiagCat_Lambda_Issue)
 /// #endif
 /// \endcode
-static void emitCategoryTable(RecordKeeper &Records, raw_ostream &OS) {
+static void emitCategoryTable(const RecordKeeper &Records, raw_ostream &OS) {
   DiagCategoryIDMap CategoriesByID(Records);
   OS << "\n#ifdef GET_CATEGORY_TABLE\n";
   for (auto const &C : CategoriesByID)
@@ -1772,13 +1776,14 @@ static void emitCategoryTable(RecordKeeper &Records, raw_ostream &OS) {
   OS << "#endif // GET_CATEGORY_TABLE\n\n";
 }
 
-void clang::EmitClangDiagGroups(RecordKeeper &Records, raw_ostream &OS) {
+void clang::EmitClangDiagGroups(const RecordKeeper &Records, raw_ostream &OS) {
   // Compute a mapping from a DiagGroup to all of its parents.
   DiagGroupParentMap DGParentMap(Records);
 
-  std::vector<Record *> Diags = Records.getAllDerivedDefinitions("Diagnostic");
+  ArrayRef<const Record *> Diags =
+      Records.getAllDerivedDefinitions("Diagnostic");
 
-  std::vector<Record *> DiagGroups =
+  ArrayRef<const Record *> DiagGroups =
       Records.getAllDerivedDefinitions("DiagGroup");
 
   std::map<std::string, GroupInfo> DiagsInGroup;
@@ -1824,9 +1829,10 @@ struct RecordIndexElement
 };
 } // end anonymous namespace.
 
-void clang::EmitClangDiagsIndexName(RecordKeeper &Records, raw_ostream &OS) {
-  const std::vector<Record*> &Diags =
-    Records.getAllDerivedDefinitions("Diagnostic");
+void clang::EmitClangDiagsIndexName(const RecordKeeper &Records,
+                                    raw_ostream &OS) {
+  ArrayRef<const Record *> Diags =
+      Records.getAllDerivedDefinitions("Diagnostic");
 
   std::vector<RecordIndexElement> Index;
   Index.reserve(Diags.size());
@@ -1915,7 +1921,7 @@ void writeDiagnosticText(DiagnosticTextBuilder &Builder, const Record *R,
 }  // namespace
 }  // namespace docs
 
-void clang::EmitClangDiagDocs(RecordKeeper &Records, raw_ostream &OS) {
+void clang::EmitClangDiagDocs(const RecordKeeper &Records, raw_ostream &OS) {
   using namespace docs;
 
   // Get the documentation introduction paragraph.
@@ -1930,10 +1936,10 @@ void clang::EmitClangDiagDocs(RecordKeeper &Records, raw_ostream &OS) {
 
   DiagnosticTextBuilder Builder(Records);
 
-  std::vector<Record*> Diags =
+  ArrayRef<const Record *> Diags =
       Records.getAllDerivedDefinitions("Diagnostic");
 
-  std::vector<Record*> DiagGroups =
+  std::vector<const Record *> DiagGroups =
       Records.getAllDerivedDefinitions("DiagGroup");
   llvm::sort(DiagGroups, diagGroupBeforeByName);
 
diff --git clang/utils/TableGen/ClangOpcodesEmitter.cpp clang/utils/TableGen/ClangOpcodesEmitter.cpp
index 120e1e2efa32..7e426d59359a 100644
--- clang/utils/TableGen/ClangOpcodesEmitter.cpp
+++ clang/utils/TableGen/ClangOpcodesEmitter.cpp
@@ -20,11 +20,11 @@ using namespace llvm;
 
 namespace {
 class ClangOpcodesEmitter {
-  RecordKeeper &Records;
+  const RecordKeeper &Records;
   unsigned NumTypes;
 
 public:
-  ClangOpcodesEmitter(RecordKeeper &R)
+  ClangOpcodesEmitter(const RecordKeeper &R)
       : Records(R), NumTypes(Records.getAllDerivedDefinitions("Type").size()) {}
 
   void run(raw_ostream &OS);
@@ -404,6 +404,6 @@ void ClangOpcodesEmitter::PrintTypes(raw_ostream &OS,
   OS << ">";
 }
 
-void clang::EmitClangOpcodes(RecordKeeper &Records, raw_ostream &OS) {
+void clang::EmitClangOpcodes(const RecordKeeper &Records, raw_ostream &OS) {
   ClangOpcodesEmitter(Records).run(OS);
 }
diff --git clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
index 968b3e0661a8..d68dcc472a7b 100644
--- clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
+++ clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
@@ -87,7 +87,7 @@ struct BuiltinTableEntries {
 //
 class BuiltinNameEmitter {
 public:
-  BuiltinNameEmitter(RecordKeeper &Records, raw_ostream &OS)
+  BuiltinNameEmitter(const RecordKeeper &Records, raw_ostream &OS)
       : Records(Records), OS(OS) {}
 
   // Entrypoint to generate the functions and structures for checking
@@ -100,7 +100,7 @@ private:
 
   // Contains OpenCL builtin functions and related information, stored as
   // Record instances. They are coming from the associated TableGen file.
-  RecordKeeper &Records;
+  const RecordKeeper &Records;
 
   // The output file.
   raw_ostream &OS;
@@ -113,7 +113,7 @@ private:
   // \param Output (out) String containing the enums to emit in the output file.
   // \param List (out) List containing the extracted Types, except the Types in
   //        TypesSeen.
-  void ExtractEnumTypes(std::vector<Record *> &Types,
+  void ExtractEnumTypes(ArrayRef<const Record *> Types,
                         StringMap<bool> &TypesSeen, std::string &Output,
                         std::vector<const Record *> &List);
 
@@ -237,7 +237,7 @@ private:
 /// Base class for emitting a file (e.g. header or test) from OpenCLBuiltins.td
 class OpenCLBuiltinFileEmitterBase {
 public:
-  OpenCLBuiltinFileEmitterBase(RecordKeeper &Records, raw_ostream &OS)
+  OpenCLBuiltinFileEmitterBase(const RecordKeeper &Records, raw_ostream &OS)
       : Records(Records), OS(OS) {}
   virtual ~OpenCLBuiltinFileEmitterBase() = default;
 
@@ -305,7 +305,7 @@ protected:
 
   // Contains OpenCL builtin functions and related information, stored as
   // Record instances. They are coming from the associated TableGen file.
-  RecordKeeper &Records;
+  const RecordKeeper &Records;
 
   // The output file.
   raw_ostream &OS;
@@ -316,7 +316,7 @@ protected:
 // builtin function described in the .td input.
 class OpenCLBuiltinTestEmitter : public OpenCLBuiltinFileEmitterBase {
 public:
-  OpenCLBuiltinTestEmitter(RecordKeeper &Records, raw_ostream &OS)
+  OpenCLBuiltinTestEmitter(const RecordKeeper &Records, raw_ostream &OS)
       : OpenCLBuiltinFileEmitterBase(Records, OS) {}
 
   // Entrypoint to generate the functions for testing all OpenCL builtin
@@ -329,7 +329,7 @@ public:
 // prototype for each builtin function described in the .td input.
 class OpenCLBuiltinHeaderEmitter : public OpenCLBuiltinFileEmitterBase {
 public:
-  OpenCLBuiltinHeaderEmitter(RecordKeeper &Records, raw_ostream &OS)
+  OpenCLBuiltinHeaderEmitter(const RecordKeeper &Records, raw_ostream &OS)
       : OpenCLBuiltinFileEmitterBase(Records, OS) {}
 
   // Entrypoint to generate the header.
@@ -362,7 +362,7 @@ void BuiltinNameEmitter::Emit() {
   EmitQualTypeFinder();
 }
 
-void BuiltinNameEmitter::ExtractEnumTypes(std::vector<Record *> &Types,
+void BuiltinNameEmitter::ExtractEnumTypes(ArrayRef<const Record *> Types,
                                           StringMap<bool> &TypesSeen,
                                           std::string &Output,
                                           std::vector<const Record *> &List) {
@@ -392,11 +392,11 @@ void BuiltinNameEmitter::EmitDeclarations() {
   // Extract generic types and non-generic types separately, to keep
   // gentypes at the end of the enum which simplifies the special handling
   // for gentypes in SemaLookup.
-  std::vector<Record *> GenTypes =
+  ArrayRef<const Record *> GenTypes =
       Records.getAllDerivedDefinitions("GenericType");
   ExtractEnumTypes(GenTypes, TypesSeen, GenTypeEnums, GenTypeList);
 
-  std::vector<Record *> Types = Records.getAllDerivedDefinitions("Type");
+  ArrayRef<const Record *> Types = Records.getAllDerivedDefinitions("Type");
   ExtractEnumTypes(Types, TypesSeen, TypeEnums, TypeList);
 
   OS << TypeEnums;
@@ -499,7 +499,7 @@ static void VerifySignature(const std::vector<Record *> &Signature,
 
 void BuiltinNameEmitter::GetOverloads() {
   // Populate the TypeMap.
-  std::vector<Record *> Types = Records.getAllDerivedDefinitions("Type");
+  ArrayRef<const Record *> Types = Records.getAllDerivedDefinitions("Type");
   unsigned I = 0;
   for (const auto &T : Types) {
     TypeMap.insert(std::make_pair(T, I++));
@@ -507,13 +507,11 @@ void BuiltinNameEmitter::GetOverloads() {
 
   // Populate the SignaturesList and the FctOverloadMap.
   unsigned CumulativeSignIndex = 0;
-  std::vector<Record *> Builtins = Records.getAllDerivedDefinitions("Builtin");
+  ArrayRef<const Record *> Builtins =
+      Records.getAllDerivedDefinitions("Builtin");
   for (const auto *B : Builtins) {
     StringRef BName = B->getValueAsString("Name");
-    if (!FctOverloadMap.contains(BName)) {
-      FctOverloadMap.insert(std::make_pair(
-          BName, std::vector<std::pair<const Record *, unsigned>>{}));
-    }
+    FctOverloadMap.try_emplace(BName);
 
     auto Signature = B->getValueAsListOfDefs("Signature");
     // Reuse signatures to avoid unnecessary duplicates.
@@ -538,7 +536,7 @@ void BuiltinNameEmitter::GetOverloads() {
 void BuiltinNameEmitter::EmitExtensionTable() {
   OS << "static const char *FunctionExtensionTable[] = {\n";
   unsigned Index = 0;
-  std::vector<Record *> FuncExtensions =
+  ArrayRef<const Record *> FuncExtensions =
       Records.getAllDerivedDefinitions("FunctionExtension");
 
   for (const auto &FE : FuncExtensions) {
@@ -807,22 +805,13 @@ static void OCL2Qual(Sema &S, const OpenCLTypeStruct &Ty,
   OS << "\n  switch (Ty.ID) {\n";
 
   // Switch cases for image types (Image2d, Image3d, ...)
-  std::vector<Record *> ImageTypes =
+  ArrayRef<const Record *> ImageTypes =
       Records.getAllDerivedDefinitions("ImageType");
 
   // Map an image type name to its 3 access-qualified types (RO, WO, RW).
-  StringMap<SmallVector<Record *, 3>> ImageTypesMap;
-  for (auto *IT : ImageTypes) {
-    auto Entry = ImageTypesMap.find(IT->getValueAsString("Name"));
-    if (Entry == ImageTypesMap.end()) {
-      SmallVector<Record *, 3> ImageList;
-      ImageList.push_back(IT);
-      ImageTypesMap.insert(
-          std::make_pair(IT->getValueAsString("Name"), ImageList));
-    } else {
-      Entry->second.push_back(IT);
-    }
-  }
+  StringMap<SmallVector<const Record *, 3>> ImageTypesMap;
+  for (auto *IT : ImageTypes)
+    ImageTypesMap[IT->getValueAsString("Name")].push_back(IT);
 
   // Emit the cases for the image types.  For an image type name, there are 3
   // corresponding QualTypes ("RO", "WO", "RW").  The "AccessQualifier" field
@@ -902,7 +891,7 @@ static void OCL2Qual(Sema &S, const OpenCLTypeStruct &Ty,
   // Switch cases for non generic, non image types (int, int4, float, ...).
   // Only insert the plain scalar type; vector information and type qualifiers
   // are added in step 2.
-  std::vector<Record *> Types = Records.getAllDerivedDefinitions("Type");
+  ArrayRef<const Record *> Types = Records.getAllDerivedDefinitions("Type");
   StringMap<bool> TypesSeen;
 
   for (const auto *T : Types) {
@@ -1223,7 +1212,8 @@ void OpenCLBuiltinTestEmitter::emit() {
   unsigned TestID = 0;
 
   // Iterate over all builtins.
-  std::vector<Record *> Builtins = Records.getAllDerivedDefinitions("Builtin");
+  ArrayRef<const Record *> Builtins =
+      Records.getAllDerivedDefinitions("Builtin");
   for (const auto *B : Builtins) {
     StringRef Name = B->getValueAsString("Name");
 
@@ -1286,7 +1276,8 @@ void OpenCLBuiltinHeaderEmitter::emit() {
 )";
 
   // Iterate over all builtins; sort to follow order of definition in .td file.
-  std::vector<Record *> Builtins = Records.getAllDerivedDefinitions("Builtin");
+  std::vector<const Record *> Builtins =
+      Records.getAllDerivedDefinitions("Builtin");
   llvm::sort(Builtins, LessRecord());
 
   for (const auto *B : Builtins) {
@@ -1331,18 +1322,19 @@ void OpenCLBuiltinHeaderEmitter::emit() {
         "#pragma OPENCL EXTENSION all : disable\n";
 }
 
-void clang::EmitClangOpenCLBuiltins(RecordKeeper &Records, raw_ostream &OS) {
+void clang::EmitClangOpenCLBuiltins(const RecordKeeper &Records,
+                                    raw_ostream &OS) {
   BuiltinNameEmitter NameChecker(Records, OS);
   NameChecker.Emit();
 }
 
-void clang::EmitClangOpenCLBuiltinHeader(RecordKeeper &Records,
+void clang::EmitClangOpenCLBuiltinHeader(const RecordKeeper &Records,
                                          raw_ostream &OS) {
   OpenCLBuiltinHeaderEmitter HeaderFileGenerator(Records, OS);
   HeaderFileGenerator.emit();
 }
 
-void clang::EmitClangOpenCLBuiltinTests(RecordKeeper &Records,
+void clang::EmitClangOpenCLBuiltinTests(const RecordKeeper &Records,
                                         raw_ostream &OS) {
   OpenCLBuiltinTestEmitter TestFileGenerator(Records, OS);
   TestFileGenerator.emit();
diff --git clang/utils/TableGen/ClangOptionDocEmitter.cpp clang/utils/TableGen/ClangOptionDocEmitter.cpp
index 86835611b842..8c32f0218e76 100644
--- clang/utils/TableGen/ClangOptionDocEmitter.cpp
+++ clang/utils/TableGen/ClangOptionDocEmitter.cpp
@@ -24,8 +24,8 @@ using namespace llvm;
 
 namespace {
 struct DocumentedOption {
-  Record *Option;
-  std::vector<Record*> Aliases;
+  const Record *Option;
+  std::vector<const Record *> Aliases;
 };
 struct DocumentedGroup;
 struct Documentation {
@@ -37,7 +37,7 @@ struct Documentation {
   }
 };
 struct DocumentedGroup : Documentation {
-  Record *Group;
+  const Record *Group;
 };
 
 static bool hasFlag(const Record *Option, StringRef OptionFlag,
@@ -63,25 +63,25 @@ static bool isOptionVisible(const Record *Option, const Record *DocInfo) {
 }
 
 // Reorganize the records into a suitable form for emitting documentation.
-Documentation extractDocumentation(RecordKeeper &Records,
+Documentation extractDocumentation(const RecordKeeper &Records,
                                    const Record *DocInfo) {
   Documentation Result;
 
   // Build the tree of groups. The root in the tree is the fake option group
   // (Record*)nullptr, which contains all top-level groups and options.
-  std::map<Record*, std::vector<Record*> > OptionsInGroup;
-  std::map<Record*, std::vector<Record*> > GroupsInGroup;
-  std::map<Record*, std::vector<Record*> > Aliases;
+  std::map<const Record *, std::vector<const Record *>> OptionsInGroup;
+  std::map<const Record *, std::vector<const Record *>> GroupsInGroup;
+  std::map<const Record *, std::vector<const Record *>> Aliases;
 
-  std::map<std::string, Record*> OptionsByName;
-  for (Record *R : Records.getAllDerivedDefinitions("Option"))
+  std::map<std::string, const Record *> OptionsByName;
+  for (const Record *R : Records.getAllDerivedDefinitions("Option"))
     OptionsByName[std::string(R->getValueAsString("Name"))] = R;
 
-  auto Flatten = [](Record *R) {
+  auto Flatten = [](const Record *R) {
     return R->getValue("DocFlatten") && R->getValueAsBit("DocFlatten");
   };
 
-  auto SkipFlattened = [&](Record *R) -> Record* {
+  auto SkipFlattened = [&](const Record *R) -> const Record * {
     while (R && Flatten(R)) {
       auto *G = dyn_cast<DefInit>(R->getValueInit("Group"));
       if (!G)
@@ -91,17 +91,17 @@ Documentation extractDocumentation(RecordKeeper &Records,
     return R;
   };
 
-  for (Record *R : Records.getAllDerivedDefinitions("OptionGroup")) {
+  for (const Record *R : Records.getAllDerivedDefinitions("OptionGroup")) {
     if (Flatten(R))
       continue;
 
-    Record *Group = nullptr;
+    const Record *Group = nullptr;
     if (auto *G = dyn_cast<DefInit>(R->getValueInit("Group")))
       Group = SkipFlattened(G->getDef());
     GroupsInGroup[Group].push_back(R);
   }
 
-  for (Record *R : Records.getAllDerivedDefinitions("Option")) {
+  for (const Record *R : Records.getAllDerivedDefinitions("Option")) {
     if (auto *A = dyn_cast<DefInit>(R->getValueInit("Alias"))) {
       Aliases[A->getDef()].push_back(R);
       continue;
@@ -120,33 +120,33 @@ Documentation extractDocumentation(RecordKeeper &Records,
       }
     }
 
-    Record *Group = nullptr;
+    const Record *Group = nullptr;
     if (auto *G = dyn_cast<DefInit>(R->getValueInit("Group")))
       Group = SkipFlattened(G->getDef());
     OptionsInGroup[Group].push_back(R);
   }
 
-  auto CompareByName = [](Record *A, Record *B) {
+  auto CompareByName = [](const Record *A, const Record *B) {
     return A->getValueAsString("Name") < B->getValueAsString("Name");
   };
 
-  auto CompareByLocation = [](Record *A, Record *B) {
+  auto CompareByLocation = [](const Record *A, const Record *B) {
     return A->getLoc()[0].getPointer() < B->getLoc()[0].getPointer();
   };
 
-  auto DocumentationForOption = [&](Record *R) -> DocumentedOption {
+  auto DocumentationForOption = [&](const Record *R) -> DocumentedOption {
     auto &A = Aliases[R];
     llvm::sort(A, CompareByName);
     return {R, std::move(A)};
   };
 
-  std::function<Documentation(Record *)> DocumentationForGroup =
-      [&](Record *R) -> Documentation {
+  std::function<Documentation(const Record *)> DocumentationForGroup =
+      [&](const Record *R) -> Documentation {
     Documentation D;
 
     auto &Groups = GroupsInGroup[R];
     llvm::sort(Groups, CompareByLocation);
-    for (Record *G : Groups) {
+    for (const Record *G : Groups) {
       D.Groups.emplace_back();
       D.Groups.back().Group = G;
       Documentation &Base = D.Groups.back();
@@ -157,7 +157,7 @@ Documentation extractDocumentation(RecordKeeper &Records,
 
     auto &Options = OptionsInGroup[R];
     llvm::sort(Options, CompareByName);
-    for (Record *O : Options)
+    for (const Record *O : Options)
       if (isOptionVisible(O, DocInfo))
         D.Options.push_back(DocumentationForOption(O));
 
@@ -444,7 +444,7 @@ void emitDocumentation(int Depth, const Documentation &Doc,
 
 }  // namespace
 
-void clang::EmitClangOptDocs(RecordKeeper &Records, raw_ostream &OS) {
+void clang::EmitClangOptDocs(const RecordKeeper &Records, raw_ostream &OS) {
   const Record *DocInfo = Records.getDef("GlobalDocumentation");
   if (!DocInfo) {
     PrintFatalError("The GlobalDocumentation top-level definition is missing, "
diff --git clang/utils/TableGen/ClangSyntaxEmitter.cpp clang/utils/TableGen/ClangSyntaxEmitter.cpp
index 9720d5873184..2a69e4c353b6 100644
--- clang/utils/TableGen/ClangSyntaxEmitter.cpp
+++ clang/utils/TableGen/ClangSyntaxEmitter.cpp
@@ -41,7 +41,7 @@ using llvm::formatv;
 // stable and useful way, where abstract Node subclasses correspond to ranges.
 class Hierarchy {
 public:
-  Hierarchy(const llvm::RecordKeeper &Records) {
+  Hierarchy(llvm::RecordKeeper &Records) {
     for (llvm::Record *T : Records.getAllDerivedDefinitions("NodeType"))
       add(T);
     for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType"))
diff --git clang/utils/TableGen/NeonEmitter.cpp clang/utils/TableGen/NeonEmitter.cpp
index 8ec8e67388bb..4707ce1ea3b7 100644
--- clang/utils/TableGen/NeonEmitter.cpp
+++ clang/utils/TableGen/NeonEmitter.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/AArch64ImmCheck.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/SetTheory.h"
@@ -47,6 +48,7 @@
 #include <set>
 #include <sstream>
 #include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -333,6 +335,8 @@ class Intrinsic {
 
   /// The types of return value [0] and parameters [1..].
   std::vector<Type> Types;
+
+  SmallVector<ImmCheck, 2> ImmChecks;
   /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
   int PolymorphicKeyType;
   /// The local variables defined.
@@ -369,12 +373,13 @@ class Intrinsic {
 public:
   Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
             TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
-            StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable, bool BigEndianSafe)
+            StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable,
+            bool BigEndianSafe)
       : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
-        ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),
-        BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),
-        UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),
-        Emitter(Emitter) {
+        ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()),
+        IsUnavailable(IsUnavailable), BigEndianSafe(BigEndianSafe),
+        PolymorphicKeyType(0), NeededEarly(false), UseMacro(false),
+        BaseType(OutTS, "."), InBaseType(InTS, "."), Emitter(Emitter) {
     // Modify the TypeSpec per-argument to get a concrete Type, and create
     // known variables for each.
     // Types[0] is the return value.
@@ -403,6 +408,37 @@ public:
           (Type.isScalar() && Type.isHalf()))
         UseMacro = true;
     }
+
+    int ArgIdx, Kind, TypeArgIdx;
+    std::vector<Record *> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
+    for (const auto *I : ImmCheckList) {
+      unsigned EltSizeInBits = 0, VecSizeInBits = 0;
+
+      ArgIdx = I->getValueAsInt("ImmArgIdx");
+      TypeArgIdx = I->getValueAsInt("TypeContextArgIdx");
+      Kind = I->getValueAsDef("Kind")->getValueAsInt("Value");
+
+      assert((ArgIdx >= 0 && Kind >= 0) &&
+             "ImmArgIdx and Kind must be nonnegative");
+
+      if (TypeArgIdx >= 0) {
+        Type ContextType = getParamType(TypeArgIdx);
+
+        // Element size cannot be set for intrinscs that map to polymorphic
+        // builtins.
+        if (CK != ClassB)
+          EltSizeInBits = ContextType.getElementSizeInBits();
+
+        VecSizeInBits = ContextType.getSizeInBits();
+      }
+
+      ImmChecks.emplace_back(ArgIdx, Kind, EltSizeInBits, VecSizeInBits);
+    }
+    llvm::sort(ImmChecks.begin(), ImmChecks.end(),
+               [](const ImmCheck &a, const ImmCheck &b) {
+                 return a.getImmArgIdx() < b.getImmArgIdx();
+               }); // Sort for comparison with other intrinsics which map to the
+                   // same builtin
   }
 
   /// Get the Record that this intrinsic is based off.
@@ -414,6 +450,7 @@ public:
   /// Get the architectural guard string (#ifdef).
   std::string getArchGuard() const { return ArchGuard; }
   std::string getTargetGuard() const { return TargetGuard; }
+  ArrayRef<ImmCheck> getImmChecks() const { return ImmChecks; }
   /// Get the non-mangled name.
   std::string getName() const { return Name; }
 
@@ -422,15 +459,11 @@ public:
     return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); });
   }
 
-  /// Return the parameter index of the immediate operand.
-  unsigned getImmediateIdx() const {
-    for (unsigned Idx = 0; Idx < Types.size(); ++Idx)
-      if (Types[Idx].isImmediate())
-        return Idx - 1;
-    llvm_unreachable("Intrinsic has no immediate");
+  // Return if the supplied argument is an immediate
+  bool isArgImmediate(unsigned idx) const {
+    return Types[idx + 1].isImmediate();
   }
 
-
   unsigned getNumParams() const { return Types.size() - 1; }
   Type getReturnType() const { return Types[0]; }
   Type getParamType(unsigned I) const { return Types[I + 1]; }
@@ -554,6 +587,8 @@ class NeonEmitter {
                                      SmallVectorImpl<Intrinsic *> &Defs);
   void genOverloadTypeCheckCode(raw_ostream &OS,
                                 SmallVectorImpl<Intrinsic *> &Defs);
+  bool areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
+                                const ArrayRef<ImmCheck> ChecksB);
   void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                   SmallVectorImpl<Intrinsic *> &Defs);
 
@@ -1031,7 +1066,7 @@ std::string Intrinsic::getBuiltinTypeStr() {
     if (LocalCK == ClassI && T.isInteger())
       T.makeSigned();
 
-    if (hasImmediate() && getImmediateIdx() == I)
+    if (isArgImmediate(I))
       T.makeImmediate(32);
 
     S += T.builtin_str();
@@ -1470,7 +1505,7 @@ Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) {
     N = emitDagArg(DI->getArg(0), "").second;
   std::optional<std::string> MangledName;
   if (MatchMangledName) {
-    if (Intr.getRecord()->getValueAsBit("isLaneQ"))
+    if (Intr.getRecord()->getValueAsString("Name").contains("laneq"))
       N += "q";
     MangledName = Intr.mangleName(N, ClassS);
   }
@@ -1601,7 +1636,7 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){
       int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
       VectorSize /= ElementSize;
 
-      std::vector<Record *> Revved;
+      std::vector<const Record *> Revved;
       for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
         for (int LI = VectorSize - 1; LI >= 0; --LI) {
           Revved.push_back(Elts2[VI + LI]);
@@ -2143,85 +2178,58 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
   OS << "#endif\n\n";
 }
 
-void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
-                                        SmallVectorImpl<Intrinsic *> &Defs) {
-  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
+inline bool
+NeonEmitter::areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
+                                      const ArrayRef<ImmCheck> ChecksB) {
+  // If multiple intrinsics map to the same builtin, we must ensure that the
+  // intended range checks performed in SemaArm.cpp do not contradict each
+  // other, as these are emitted once per-buitlin.
+  //
+  // The arguments to be checked and type of each check to be performed must be
+  // the same. The element types may differ as they will be resolved
+  // per-intrinsic as overloaded types by SemaArm.cpp, though the vector sizes
+  // are not and so must be the same.
+  bool compat =
+      std::equal(ChecksA.begin(), ChecksA.end(), ChecksB.begin(), ChecksB.end(),
+                 [](const auto &A, const auto &B) {
+                   return A.getImmArgIdx() == B.getImmArgIdx() &&
+                          A.getKind() == B.getKind() &&
+                          A.getVecSizeInBits() == B.getVecSizeInBits();
+                 });
+
+  return compat;
+}
 
-  std::set<std::string> Emitted;
+void NeonEmitter::genIntrinsicRangeCheckCode(
+    raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
+  std::unordered_map<std::string, ArrayRef<ImmCheck>> Emitted;
 
-  for (auto *Def : Defs) {
-    if (Def->hasBody())
-      continue;
-    // Functions which do not have an immediate do not need to have range
-    // checking code emitted.
-    if (!Def->hasImmediate())
-      continue;
-    if (Emitted.find(Def->getMangledName()) != Emitted.end())
+  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
+  for (auto &Def : Defs) {
+    // If the Def has a body (operation DAGs), it is not a __builtin_neon_
+    if (Def->hasBody() || !Def->hasImmediate())
       continue;
 
-    std::string LowerBound, UpperBound;
-
-    Record *R = Def->getRecord();
-    if (R->getValueAsBit("isVXAR")) {
-      //VXAR takes an immediate in the range [0, 63]
-      LowerBound = "0";
-      UpperBound = "63";
-    } else if (R->getValueAsBit("isVCVT_N")) {
-      // VCVT between floating- and fixed-point values takes an immediate
-      // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
-      LowerBound = "1";
-	  if (Def->getBaseType().getElementSizeInBits() == 16 ||
-		  Def->getName().find('h') != std::string::npos)
-		// VCVTh operating on FP16 intrinsics in range [1, 16)
-		UpperBound = "15";
-	  else if (Def->getBaseType().getElementSizeInBits() == 32)
-        UpperBound = "31";
-	  else
-        UpperBound = "63";
-    } else if (R->getValueAsBit("isScalarShift")) {
-      // Right shifts have an 'r' in the name, left shifts do not. Convert
-      // instructions have the same bounds and right shifts.
-      if (Def->getName().find('r') != std::string::npos ||
-          Def->getName().find("cvt") != std::string::npos)
-        LowerBound = "1";
-
-      UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
-    } else if (R->getValueAsBit("isShift")) {
-      // Builtins which are overloaded by type will need to have their upper
-      // bound computed at Sema time based on the type constant.
-
-      // Right shifts have an 'r' in the name, left shifts do not.
-      if (Def->getName().find('r') != std::string::npos)
-        LowerBound = "1";
-      UpperBound = "RFT(TV, true)";
-    } else if (Def->getClassKind(true) == ClassB) {
-      // ClassB intrinsics have a type (and hence lane number) that is only
-      // known at runtime.
-      if (R->getValueAsBit("isLaneQ"))
-        UpperBound = "RFT(TV, false, true)";
-      else
-        UpperBound = "RFT(TV, false, false)";
-    } else {
-      // The immediate generally refers to a lane in the preceding argument.
-      assert(Def->getImmediateIdx() > 0);
-      Type T = Def->getParamType(Def->getImmediateIdx() - 1);
-      UpperBound = utostr(T.getNumElements() - 1);
-    }
+    // Sorted by immediate argument index
+    ArrayRef<ImmCheck> Checks = Def->getImmChecks();
 
-    // Calculate the index of the immediate that should be range checked.
-    unsigned Idx = Def->getNumParams();
-    if (Def->hasImmediate())
-      Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());
-
-    OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
-       << "i = " << Idx << ";";
-    if (!LowerBound.empty())
-      OS << " l = " << LowerBound << ";";
-    if (!UpperBound.empty())
-      OS << " u = " << UpperBound << ";";
-    OS << " break;\n";
+    const auto it = Emitted.find(Def->getMangledName());
+    if (it != Emitted.end()) {
+      assert(areRangeChecksCompatible(Checks, it->second) &&
+             "Neon intrinsics with incompatible immediate range checks cannot "
+             "share a builtin.");
+      continue; // Ensure this is emitted only once
+    }
 
-    Emitted.insert(Def->getMangledName());
+    // Emit builtin's range checks
+    OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
+    for (const auto &Check : Checks) {
+      OS << " ImmChecks.emplace_back(" << Check.getImmArgIdx() << ", "
+         << Check.getKind() << ", " << Check.getElementSizeInBits() << ", "
+         << Check.getVecSizeInBits() << ");\n"
+         << " break;\n";
+    }
+    Emitted[Def->getMangledName()] = Checks;
   }
 
   OS << "#endif\n\n";
diff --git clang/utils/TableGen/SveEmitter.cpp clang/utils/TableGen/SveEmitter.cpp
index ca63bd354bfc..b2e2db1a4099 100644
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -27,6 +27,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/TableGen/AArch64ImmCheck.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include <array>
@@ -49,23 +50,6 @@ enum class ACLEKind { SVE, SME };
 using TypeSpec = std::string;
 
 namespace {
-
-class ImmCheck {
-  unsigned Arg;
-  unsigned Kind;
-  unsigned ElementSizeInBits;
-
-public:
-  ImmCheck(unsigned Arg, unsigned Kind, unsigned ElementSizeInBits = 0)
-      : Arg(Arg), Kind(Kind), ElementSizeInBits(ElementSizeInBits) {}
-  ImmCheck(const ImmCheck &Other) = default;
-  ~ImmCheck() = default;
-
-  unsigned getArg() const { return Arg; }
-  unsigned getKind() const { return Kind; }
-  unsigned getElementSizeInBits() const { return ElementSizeInBits; }
-};
-
 class SVEType {
   bool Float, Signed, Immediate, Void, Constant, Pointer, BFloat;
   bool DefaultType, IsScalable, Predicate, PredicatePattern, PrefetchOp,
@@ -388,6 +372,9 @@ public:
   /// Emit all the range checks for the immediates.
   void createRangeChecks(raw_ostream &o);
 
+  // Emit all the ImmCheckTypes to arm_immcheck_types.inc
+  void createImmCheckTypes(raw_ostream &OS);
+
   /// Create the SVETypeFlags used in CGBuiltins
   void createTypeFlags(raw_ostream &o);
 
@@ -430,7 +417,6 @@ const std::array<SVEEmitter::ReinterpretTypeInfo, 12> SVEEmitter::Reinterprets =
 
 } // end anonymous namespace
 
-
 //===----------------------------------------------------------------------===//
 // Type implementation
 //===----------------------------------------------------------------------===//
@@ -1210,18 +1196,17 @@ void SVEEmitter::createIntrinsic(
     // Collate a list of range/option checks for the immediates.
     SmallVector<ImmCheck, 2> ImmChecks;
     for (auto *R : ImmCheckList) {
-      int64_t Arg = R->getValueAsInt("Arg");
-      int64_t EltSizeArg = R->getValueAsInt("EltSizeArg");
+      int64_t ArgIdx = R->getValueAsInt("ImmArgIdx");
+      int64_t EltSizeArgIdx = R->getValueAsInt("TypeContextArgIdx");
       int64_t Kind = R->getValueAsDef("Kind")->getValueAsInt("Value");
-      assert(Arg >= 0 && Kind >= 0 && "Arg and Kind must be nonnegative");
+      assert(ArgIdx >= 0 && Kind >= 0 &&
+             "ImmArgIdx and Kind must be nonnegative");
 
       unsigned ElementSizeInBits = 0;
-      char Mod;
-      unsigned NumVectors;
-      std::tie(Mod, NumVectors) = getProtoModifier(Proto, EltSizeArg + 1);
-      if (EltSizeArg >= 0)
+      auto [Mod, NumVectors] = getProtoModifier(Proto, EltSizeArgIdx + 1);
+      if (EltSizeArgIdx >= 0)
         ElementSizeInBits = SVEType(TS, Mod, NumVectors).getElementSizeInBits();
-      ImmChecks.push_back(ImmCheck(Arg, Kind, ElementSizeInBits));
+      ImmChecks.push_back(ImmCheck(ArgIdx, Kind, ElementSizeInBits));
     }
 
     Out.push_back(std::make_unique<Intrinsic>(
@@ -1541,8 +1526,8 @@ void SVEEmitter::createRangeChecks(raw_ostream &OS) {
 
     OS << "case SVE::BI__builtin_sve_" << Def->getMangledName() << ":\n";
     for (auto &Check : Def->getImmChecks())
-      OS << "ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", "
-         << Check.getKind() << ", " << Check.getElementSizeInBits() << "));\n";
+      OS << "ImmChecks.emplace_back(" << Check.getImmArgIdx() << ", "
+         << Check.getKind() << ", " << Check.getElementSizeInBits() << ");\n";
     OS << "  break;\n";
 
     Emitted.insert(Def->getMangledName());
@@ -1572,8 +1557,10 @@ void SVEEmitter::createTypeFlags(raw_ostream &OS) {
   for (auto &KV : MergeTypes)
     OS << "  " << KV.getKey() << " = " << KV.getValue() << ",\n";
   OS << "#endif\n\n";
+}
 
-  OS << "#ifdef LLVM_GET_SVE_IMMCHECKTYPES\n";
+void SVEEmitter::createImmCheckTypes(raw_ostream &OS) {
+  OS << "#ifdef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES\n";
   for (auto &KV : ImmCheckTypes)
     OS << "  " << KV.getKey() << " = " << KV.getValue() << ",\n";
   OS << "#endif\n\n";
@@ -1734,8 +1721,9 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) {
 
     OS << "case SME::BI__builtin_sme_" << Def->getMangledName() << ":\n";
     for (auto &Check : Def->getImmChecks())
-      OS << "ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", "
-         << Check.getKind() << ", " << Check.getElementSizeInBits() << "));\n";
+      OS << "ImmChecks.push_back(std::make_tuple(" << Check.getImmArgIdx()
+         << ", " << Check.getKind() << ", " << Check.getElementSizeInBits()
+         << "));\n";
     OS << "  break;\n";
 
     Emitted.insert(Def->getMangledName());
@@ -1858,6 +1846,10 @@ void EmitSveTypeFlags(RecordKeeper &Records, raw_ostream &OS) {
   SVEEmitter(Records).createTypeFlags(OS);
 }
 
+void EmitImmCheckTypes(RecordKeeper &Records, raw_ostream &OS) {
+  SVEEmitter(Records).createImmCheckTypes(OS);
+}
+
 void EmitSveStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) {
   SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SVE);
 }
diff --git clang/utils/TableGen/TableGen.cpp clang/utils/TableGen/TableGen.cpp
index 42cc704543f1..84afd4c0afb2 100644
--- clang/utils/TableGen/TableGen.cpp
+++ clang/utils/TableGen/TableGen.cpp
@@ -75,6 +75,7 @@ enum ActionType {
   GenArmVectorType,
   GenArmNeonSema,
   GenArmNeonTest,
+  GenArmImmCheckTypes,
   GenArmMveHeader,
   GenArmMveBuiltinDef,
   GenArmMveBuiltinSema,
@@ -234,6 +235,10 @@ cl::opt<ActionType> Action(
                    "Generate ARM NEON sema support for clang"),
         clEnumValN(GenArmNeonTest, "gen-arm-neon-test",
                    "Generate ARM NEON tests for clang"),
+        clEnumValN(
+            GenArmImmCheckTypes, "gen-arm-immcheck-types",
+            "Generate arm_immcheck_types.inc (immediate range check types)"
+            " for clang"),
         clEnumValN(GenArmSveHeader, "gen-arm-sve-header",
                    "Generate arm_sve.h for clang"),
         clEnumValN(GenArmSveBuiltins, "gen-arm-sve-builtins",
@@ -469,6 +474,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenArmNeonTest:
     EmitNeonTest(Records, OS);
     break;
+  case GenArmImmCheckTypes:
+    EmitImmCheckTypes(Records, OS);
+    break;
   case GenArmMveHeader:
     EmitMveHeader(Records, OS);
     break;
diff --git clang/utils/TableGen/TableGenBackends.h clang/utils/TableGen/TableGenBackends.h
index 5f2dd257cb90..c0582e341fc8 100644
--- clang/utils/TableGen/TableGenBackends.h
+++ clang/utils/TableGen/TableGenBackends.h
@@ -24,7 +24,7 @@ class RecordKeeper;
 
 namespace clang {
 
-void EmitClangDeclContext(llvm::RecordKeeper &RK, llvm::raw_ostream &OS);
+void EmitClangDeclContext(const llvm::RecordKeeper &RK, llvm::raw_ostream &OS);
 /**
   @param PriorizeIfSubclassOf These classes should be prioritized in the output.
   This is useful to force enum generation/jump tables/lookup tables to be more
@@ -32,7 +32,7 @@ void EmitClangDeclContext(llvm::RecordKeeper &RK, llvm::raw_ostream &OS);
   in Decl for classes that inherit from DeclContext, for functions like
   castFromDeclContext.
   */
-void EmitClangASTNodes(llvm::RecordKeeper &RK, llvm::raw_ostream &OS,
+void EmitClangASTNodes(const llvm::RecordKeeper &RK, llvm::raw_ostream &OS,
                        const std::string &N, const std::string &S,
                        std::string_view PriorizeIfSubclassOf = "");
 void EmitClangBasicReader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
@@ -40,61 +40,69 @@ void EmitClangBasicWriter(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitClangTypeNodes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitClangTypeReader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitClangTypeWriter(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
-void EmitClangAttrParserStringSwitches(llvm::RecordKeeper &Records,
+void EmitClangAttrParserStringSwitches(const llvm::RecordKeeper &Records,
                                        llvm::raw_ostream &OS);
 void EmitClangAttrSubjectMatchRulesParserStringSwitches(
-    llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
-void EmitClangAttrClass(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
-void EmitClangAttrImpl(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
-void EmitClangAttrList(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
-void EmitClangAttrSubjectMatchRuleList(llvm::RecordKeeper &Records,
+    const llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitClangAttrClass(const llvm::RecordKeeper &Records,
+                        llvm::raw_ostream &OS);
+void EmitClangAttrImpl(const llvm::RecordKeeper &Records,
+                       llvm::raw_ostream &OS);
+void EmitClangAttrList(const llvm::RecordKeeper &Records,
+                       llvm::raw_ostream &OS);
+void EmitClangAttrSubjectMatchRuleList(const llvm::RecordKeeper &Records,
                                        llvm::raw_ostream &OS);
-void EmitClangAttrPCHRead(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
-void EmitClangAttrPCHWrite(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
-void EmitClangRegularKeywordAttributeInfo(llvm::RecordKeeper &Records,
+void EmitClangAttrPCHRead(const llvm::RecordKeeper &Records,
+                          llvm::raw_ostream &OS);
+void EmitClangAttrPCHWrite(const llvm::RecordKeeper &Records,
+                           llvm::raw_ostream &OS);
+void EmitClangRegularKeywordAttributeInfo(const llvm::RecordKeeper &Records,
                                           llvm::raw_ostream &OS);
-void EmitClangAttrHasAttrImpl(llvm::RecordKeeper &Records,
+void EmitClangAttrHasAttrImpl(const llvm::RecordKeeper &Records,
                               llvm::raw_ostream &OS);
-void EmitClangAttrSpellingListIndex(llvm::RecordKeeper &Records,
+void EmitClangAttrSpellingListIndex(const llvm::RecordKeeper &Records,
                                     llvm::raw_ostream &OS);
-void EmitClangAttrASTVisitor(llvm::RecordKeeper &Records,
+void EmitClangAttrASTVisitor(const llvm::RecordKeeper &Records,
                              llvm::raw_ostream &OS);
-void EmitClangAttrTemplateInstantiate(llvm::RecordKeeper &Records,
+void EmitClangAttrTemplateInstantiate(const llvm::RecordKeeper &Records,
                                       llvm::raw_ostream &OS);
-void EmitClangAttrParsedAttrList(llvm::RecordKeeper &Records,
+void EmitClangAttrParsedAttrList(const llvm::RecordKeeper &Records,
                                  llvm::raw_ostream &OS);
-void EmitClangAttrParsedAttrImpl(llvm::RecordKeeper &Records,
+void EmitClangAttrParsedAttrImpl(const llvm::RecordKeeper &Records,
                                  llvm::raw_ostream &OS);
-void EmitClangAttrParsedAttrKinds(llvm::RecordKeeper &Records,
+void EmitClangAttrParsedAttrKinds(const llvm::RecordKeeper &Records,
                                   llvm::raw_ostream &OS);
-void EmitClangAttrTextNodeDump(llvm::RecordKeeper &Records,
+void EmitClangAttrTextNodeDump(const llvm::RecordKeeper &Records,
                                llvm::raw_ostream &OS);
-void EmitClangAttrNodeTraverse(llvm::RecordKeeper &Records,
+void EmitClangAttrNodeTraverse(const llvm::RecordKeeper &Records,
                                llvm::raw_ostream &OS);
-void EmitClangAttrDocTable(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitClangAttrDocTable(const llvm::RecordKeeper &Records,
+                           llvm::raw_ostream &OS);
 
-void EmitClangBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitClangBuiltins(const llvm::RecordKeeper &Records,
+                       llvm::raw_ostream &OS);
 
-void EmitClangDiagsDefs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS,
-                        const std::string &Component);
-void EmitClangDiagGroups(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
-void EmitClangDiagsIndexName(llvm::RecordKeeper &Records,
+void EmitClangDiagsDefs(const llvm::RecordKeeper &Records,
+                        llvm::raw_ostream &OS, const std::string &Component);
+void EmitClangDiagGroups(const llvm::RecordKeeper &Records,
+                         llvm::raw_ostream &OS);
+void EmitClangDiagsIndexName(const llvm::RecordKeeper &Records,
                              llvm::raw_ostream &OS);
 
 void EmitClangSACheckers(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
-void EmitClangCommentHTMLTags(llvm::RecordKeeper &Records,
+void EmitClangCommentHTMLTags(const llvm::RecordKeeper &Records,
                               llvm::raw_ostream &OS);
-void EmitClangCommentHTMLTagsProperties(llvm::RecordKeeper &Records,
+void EmitClangCommentHTMLTagsProperties(const llvm::RecordKeeper &Records,
                                         llvm::raw_ostream &OS);
-void EmitClangCommentHTMLNamedCharacterReferences(llvm::RecordKeeper &Records,
-                                                  llvm::raw_ostream &OS);
+void EmitClangCommentHTMLNamedCharacterReferences(
+    const llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
-void EmitClangCommentCommandInfo(llvm::RecordKeeper &Records,
+void EmitClangCommentCommandInfo(const llvm::RecordKeeper &Records,
                                  llvm::raw_ostream &OS);
-void EmitClangCommentCommandList(llvm::RecordKeeper &Records,
+void EmitClangCommentCommandList(const llvm::RecordKeeper &Records,
                                  llvm::raw_ostream &OS);
-void EmitClangOpcodes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitClangOpcodes(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
 void EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
                              llvm::raw_ostream &OS);
@@ -108,6 +116,7 @@ void EmitNeonSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitVectorTypes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitNeonTest(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
+void EmitImmCheckTypes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitSveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitSveBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitSveBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
@@ -139,22 +148,24 @@ void EmitCdeBuiltinSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitCdeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitCdeBuiltinAliases(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
-void EmitClangAttrDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
-void EmitClangDiagDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
-void EmitClangOptDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitClangAttrDocs(const llvm::RecordKeeper &Records,
+                       llvm::raw_ostream &OS);
+void EmitClangDiagDocs(const llvm::RecordKeeper &Records,
+                       llvm::raw_ostream &OS);
+void EmitClangOptDocs(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
-void EmitClangOpenCLBuiltins(llvm::RecordKeeper &Records,
+void EmitClangOpenCLBuiltins(const llvm::RecordKeeper &Records,
                              llvm::raw_ostream &OS);
-void EmitClangOpenCLBuiltinHeader(llvm::RecordKeeper &Records,
+void EmitClangOpenCLBuiltinHeader(const llvm::RecordKeeper &Records,
                                   llvm::raw_ostream &OS);
-void EmitClangOpenCLBuiltinTests(llvm::RecordKeeper &Records,
+void EmitClangOpenCLBuiltinTests(const llvm::RecordKeeper &Records,
                                  llvm::raw_ostream &OS);
 
-void EmitClangDataCollectors(llvm::RecordKeeper &Records,
+void EmitClangDataCollectors(const llvm::RecordKeeper &Records,
                              llvm::raw_ostream &OS);
 
-void EmitTestPragmaAttributeSupportedAttributes(llvm::RecordKeeper &Records,
-                                                llvm::raw_ostream &OS);
+void EmitTestPragmaAttributeSupportedAttributes(
+    const llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
 } // end namespace clang
 
diff --git clang/www/cxx_dr_status.html clang/www/cxx_dr_status.html
index aa79c3706f32..f036fc5add24 100755
--- clang/www/cxx_dr_status.html
+++ clang/www/cxx_dr_status.html
@@ -10717,7 +10717,7 @@ and <I>POD class</I></td>
     <td><a href="https://cplusplus.github.io/CWG/issues/1815.html">1815</a></td>
     <td>CD4</td>
     <td>Lifetime extension in aggregate initialization</td>
-    <td class="none" align="center">No</td>
+    <td class="unreleased" align="center">Clang 20</td>
   </tr>
   <tr id="1816">
     <td><a href="https://cplusplus.github.io/CWG/issues/1816.html">1816</a></td>
@@ -16341,7 +16341,7 @@ and <I>POD class</I></td>
     <td><a href="https://cplusplus.github.io/CWG/issues/2749.html">2749</a></td>
     <td>DRWP</td>
     <td>Treatment of "pointer to void" for relational comparisons</td>
-    <td class="unknown" align="center">Unknown</td>
+    <td class="unreleased" align="center">Clang 20</td>
   </tr>
   <tr id="2750">
     <td><a href="https://cplusplus.github.io/CWG/issues/2750.html">2750</a></td>
diff --git compiler-rt/CMakeLists.txt compiler-rt/CMakeLists.txt
index 57914c3175e8..deb6994f4818 100644
--- compiler-rt/CMakeLists.txt
+++ compiler-rt/CMakeLists.txt
@@ -381,7 +381,7 @@ if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "s390x")
 endif()
 
 if(MSVC)
-  # FIXME: In fact, sanitizers should support both /MT and /MD, see PR20214.
+
   set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreaded)
 
   # Remove any /M[DT][d] flags, and strip any definitions of _DEBUG.
diff --git compiler-rt/include/CMakeLists.txt compiler-rt/include/CMakeLists.txt
index d598a94ee2e2..242d62b9b447 100644
--- compiler-rt/include/CMakeLists.txt
+++ compiler-rt/include/CMakeLists.txt
@@ -10,6 +10,7 @@ if (COMPILER_RT_BUILD_SANITIZERS)
     sanitizer/lsan_interface.h
     sanitizer/msan_interface.h
     sanitizer/netbsd_syscall_hooks.h
+    sanitizer/rtsan_interface.h
     sanitizer/scudo_interface.h
     sanitizer/tsan_interface.h
     sanitizer/tsan_interface_atomic.h
diff --git compiler-rt/include/sanitizer/rtsan_interface.h compiler-rt/include/sanitizer/rtsan_interface.h
new file mode 100644
index 000000000000..5d7ce5345712
--- /dev/null
+++ compiler-rt/include/sanitizer/rtsan_interface.h
@@ -0,0 +1,75 @@
+//===-- sanitizer/rtsan_interface.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of RealtimeSanitizer.
+//
+// Public interface header.
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_RTSAN_INTERFACE_H
+#define SANITIZER_RTSAN_INTERFACE_H
+
+#include <sanitizer/common_interface_defs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Disable all RTSan error reporting.
+// Must be paired with a call to `__rtsan_enable`
+void SANITIZER_CDECL __rtsan_disable(void);
+
+// Re-enable all RTSan error reporting.
+// Must follow a call to `__rtsan_disable`.
+void SANITIZER_CDECL __rtsan_enable(void);
+
+#ifdef __cplusplus
+} // extern "C"
+
+namespace __rtsan {
+#if defined(__has_feature) && __has_feature(realtime_sanitizer)
+
+class ScopedDisabler {
+public:
+  ScopedDisabler() { __rtsan_disable(); }
+  ~ScopedDisabler() { __rtsan_enable(); }
+
+#if __cplusplus >= 201103L
+  ScopedDisabler(const ScopedDisabler &) = delete;
+  ScopedDisabler &operator=(const ScopedDisabler &) = delete;
+  ScopedDisabler(ScopedDisabler &&) = delete;
+  ScopedDisabler &operator=(ScopedDisabler &&) = delete;
+#else
+private:
+  ScopedDisabler(const ScopedDisabler &);
+  ScopedDisabler &operator=(const ScopedDisabler &);
+#endif // __cplusplus >= 201103L
+};
+
+#else
+
+class ScopedDisabler {
+public:
+  ScopedDisabler() {}
+#if __cplusplus >= 201103L
+  ScopedDisabler(const ScopedDisabler &) = delete;
+  ScopedDisabler &operator=(const ScopedDisabler &) = delete;
+  ScopedDisabler(ScopedDisabler &&) = delete;
+  ScopedDisabler &operator=(ScopedDisabler &&) = delete;
+#else
+private:
+  ScopedDisabler(const ScopedDisabler &);
+  ScopedDisabler &operator=(const ScopedDisabler &);
+#endif // __cplusplus >= 201103L
+};
+
+#endif // defined(__has_feature) && __has_feature(realtime_sanitizer)
+} // namespace __rtsan
+#endif // __cplusplus
+
+#endif // SANITIZER_RTSAN_INTERFACE_H
diff --git compiler-rt/lib/asan/CMakeLists.txt compiler-rt/lib/asan/CMakeLists.txt
index 463ea233b37a..fb3d74283a61 100644
--- compiler-rt/lib/asan/CMakeLists.txt
+++ compiler-rt/lib/asan/CMakeLists.txt
@@ -32,6 +32,20 @@ set(ASAN_SOURCES
   asan_win.cpp
   )
 
+if(WIN32)
+  set(ASAN_DYNAMIC_RUNTIME_THUNK_SOURCES
+    asan_globals_win.cpp
+    asan_win_common_runtime_thunk.cpp
+    asan_win_dynamic_runtime_thunk.cpp
+    )
+  set(ASAN_STATIC_RUNTIME_THUNK_SOURCES
+    asan_globals_win.cpp
+    asan_malloc_win_thunk.cpp
+    asan_win_common_runtime_thunk.cpp
+    asan_win_static_runtime_thunk.cpp
+    )
+endif()
+
 if (NOT WIN32 AND NOT APPLE)
   list(APPEND ASAN_SOURCES
     asan_interceptors_vfork.S
@@ -83,7 +97,13 @@ SET(ASAN_HEADERS
   )
 
 include_directories(..)
-
+if(MSVC)
+  # asan on windows only supports the release dll version of the runtimes, in the interest of
+  # only having one asan dll to support/test. Having asan statically linked
+  # with the runtime might be possible, but it multiplies the number of scenerios to test.
+  # the program USING sanitizers can use whatever version of the runtime it wants to.
+  set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreadedDLL)
+endif()
 set(ASAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
 
 append_list_if(MSVC /Zl ASAN_CFLAGS)
@@ -117,7 +137,11 @@ append_list_if(WIN32 INTERCEPTION_DYNAMIC_CRT ASAN_DYNAMIC_DEFINITIONS)
 set(ASAN_DYNAMIC_CFLAGS ${ASAN_CFLAGS})
 append_list_if(COMPILER_RT_HAS_FTLS_MODEL_INITIAL_EXEC
   -ftls-model=initial-exec ASAN_DYNAMIC_CFLAGS)
-append_list_if(MSVC /DEBUG ASAN_DYNAMIC_LINK_FLAGS)
+
+# LLVM turns /OPT:ICF back on when LLVM_ENABLE_PDBs is set
+# we _REALLY_ need to turn it back off for ASAN, because the way
+# asan emulates weak functions from DLLs requires NOICF
+append_list_if(MSVC "/DEBUG;/OPT:NOICF" ASAN_DYNAMIC_LINK_FLAGS)
 
 set(ASAN_DYNAMIC_LIBS
   ${COMPILER_RT_UNWINDER_LINK_LIBS}
@@ -221,46 +245,52 @@ else()
     RTSanitizerCommonSymbolizerInternal
     RTLSanCommon
     RTUbsan)
+  if (NOT WIN32)
+    add_compiler_rt_runtime(clang_rt.asan
+      STATIC
+      ARCHS ${ASAN_SUPPORTED_ARCH}
+      OBJECT_LIBS RTAsan_preinit
+                  RTAsan
+                  ${ASAN_COMMON_RUNTIME_OBJECT_LIBS}
+      CFLAGS ${ASAN_CFLAGS}
+      DEFS ${ASAN_COMMON_DEFINITIONS}
+      PARENT_TARGET asan)
 
-  add_compiler_rt_runtime(clang_rt.asan
-    STATIC
-    ARCHS ${ASAN_SUPPORTED_ARCH}
-    OBJECT_LIBS RTAsan_preinit
-                RTAsan
-                ${ASAN_COMMON_RUNTIME_OBJECT_LIBS}
-    CFLAGS ${ASAN_CFLAGS}
-    DEFS ${ASAN_COMMON_DEFINITIONS}
-    PARENT_TARGET asan)
-
-  add_compiler_rt_runtime(clang_rt.asan_cxx
-    STATIC
-    ARCHS ${ASAN_SUPPORTED_ARCH}
-    OBJECT_LIBS RTAsan_cxx
-                RTUbsan_cxx
-    CFLAGS ${ASAN_CFLAGS}
-    DEFS ${ASAN_COMMON_DEFINITIONS}
-    PARENT_TARGET asan)
+    add_compiler_rt_runtime(clang_rt.asan_cxx
+      STATIC
+      ARCHS ${ASAN_SUPPORTED_ARCH}
+      OBJECT_LIBS RTAsan_cxx
+                  RTUbsan_cxx
+      CFLAGS ${ASAN_CFLAGS}
+      DEFS ${ASAN_COMMON_DEFINITIONS}
+      PARENT_TARGET asan)
 
-  add_compiler_rt_runtime(clang_rt.asan_static
-    STATIC
-    ARCHS ${ASAN_SUPPORTED_ARCH}
-    OBJECT_LIBS RTAsan_static
-    CFLAGS ${ASAN_CFLAGS}
-    DEFS ${ASAN_COMMON_DEFINITIONS}
-    PARENT_TARGET asan)
+    add_compiler_rt_runtime(clang_rt.asan_static
+      STATIC
+      ARCHS ${ASAN_SUPPORTED_ARCH}
+      OBJECT_LIBS RTAsan_static
+      CFLAGS ${ASAN_CFLAGS}
+      DEFS ${ASAN_COMMON_DEFINITIONS}
+      PARENT_TARGET asan)
 
-  add_compiler_rt_runtime(clang_rt.asan-preinit
-    STATIC
-    ARCHS ${ASAN_SUPPORTED_ARCH}
-    OBJECT_LIBS RTAsan_preinit
-    CFLAGS ${ASAN_CFLAGS}
-    DEFS ${ASAN_COMMON_DEFINITIONS}
-    PARENT_TARGET asan)
+    add_compiler_rt_runtime(clang_rt.asan-preinit
+      STATIC
+      ARCHS ${ASAN_SUPPORTED_ARCH}
+      OBJECT_LIBS RTAsan_preinit
+      CFLAGS ${ASAN_CFLAGS}
+      DEFS ${ASAN_COMMON_DEFINITIONS}
+      PARENT_TARGET asan)
+  endif()
 
   foreach(arch ${ASAN_SUPPORTED_ARCH})
     if (COMPILER_RT_HAS_VERSION_SCRIPT)
+      if(WIN32)
+        set(SANITIZER_RT_VERSION_LIST_LIBS clang_rt.asan-${arch})
+      else()
+        set(SANITIZER_RT_VERSION_LIST_LIBS clang_rt.asan-${arch} clang_rt.asan_cxx-${arch})
+      endif()
       add_sanitizer_rt_version_list(clang_rt.asan-dynamic-${arch}
-                                    LIBS clang_rt.asan-${arch} clang_rt.asan_cxx-${arch}
+                                    LIBS ${SANITIZER_RT_VERSION_LIST_LIBS}
                                     EXTRA asan.syms.extra)
       set(VERSION_SCRIPT_FLAG
            -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.asan-dynamic-${arch}.vers)
@@ -278,21 +308,6 @@ else()
     endif()
 
     set(ASAN_DYNAMIC_WEAK_INTERCEPTION)
-    if (WIN32)
-      add_compiler_rt_object_libraries(AsanWeakInterception
-        ${SANITIZER_COMMON_SUPPORTED_OS}
-        ARCHS ${arch}
-        SOURCES
-          asan_win_weak_interception.cpp
-        CFLAGS ${ASAN_CFLAGS} -DSANITIZER_DYNAMIC
-        DEFS ${ASAN_COMMON_DEFINITIONS})
-      set(ASAN_DYNAMIC_WEAK_INTERCEPTION
-          AsanWeakInterception
-          UbsanWeakInterception
-          SancovWeakInterception
-          SanitizerCommonWeakInterception)
-    endif()
-
     add_compiler_rt_runtime(clang_rt.asan
       SHARED
       ARCHS ${arch}
@@ -324,36 +339,12 @@ else()
     endif()
 
     if (WIN32)
-      add_compiler_rt_object_libraries(AsanDllThunk
-        ${SANITIZER_COMMON_SUPPORTED_OS}
-        ARCHS ${arch}
-        SOURCES asan_globals_win.cpp
-                asan_win_dll_thunk.cpp
-        CFLAGS ${ASAN_CFLAGS} -DSANITIZER_DLL_THUNK
-        DEFS ${ASAN_COMMON_DEFINITIONS})
-
-      add_compiler_rt_runtime(clang_rt.asan_dll_thunk
-        STATIC
-        ARCHS ${arch}
-        OBJECT_LIBS AsanDllThunk
-                    UbsanDllThunk
-                    SancovDllThunk
-                    SanitizerCommonDllThunk
-        SOURCES $<TARGET_OBJECTS:RTInterception.${arch}>
-        PARENT_TARGET asan)
-
       set(DYNAMIC_RUNTIME_THUNK_CFLAGS "-DSANITIZER_DYNAMIC_RUNTIME_THUNK")
-      if(MSVC)
-        list(APPEND DYNAMIC_RUNTIME_THUNK_CFLAGS "-Zl")
-      elseif(CMAKE_C_COMPILER_ID MATCHES Clang)
-        list(APPEND DYNAMIC_RUNTIME_THUNK_CFLAGS "-nodefaultlibs")
-      endif()
 
       add_compiler_rt_object_libraries(AsanDynamicRuntimeThunk
         ${SANITIZER_COMMON_SUPPORTED_OS}
         ARCHS ${arch}
-        SOURCES asan_globals_win.cpp
-                asan_win_dynamic_runtime_thunk.cpp
+        SOURCES ${ASAN_DYNAMIC_RUNTIME_THUNK_SOURCES}
         CFLAGS ${ASAN_CFLAGS} ${DYNAMIC_RUNTIME_THUNK_CFLAGS}
         DEFS ${ASAN_COMMON_DEFINITIONS})
 
@@ -361,12 +352,35 @@ else()
         STATIC
         ARCHS ${arch}
         OBJECT_LIBS AsanDynamicRuntimeThunk
-                    UbsanDynamicRuntimeThunk
-                    SancovDynamicRuntimeThunk
-                    SanitizerCommonDynamicRuntimeThunk
+                    UbsanRuntimeThunk
+                    SancovRuntimeThunk
+                    SanitizerRuntimeThunk
         CFLAGS ${ASAN_CFLAGS} ${DYNAMIC_RUNTIME_THUNK_CFLAGS}
         DEFS ${ASAN_COMMON_DEFINITIONS}
         PARENT_TARGET asan)
+
+      # mingw does not support static linkage of the CRT
+      if(NOT MINGW)
+        set(STATIC_RUNTIME_THUNK_CFLAGS "-DSANITIZER_STATIC_RUNTIME_THUNK")
+
+        add_compiler_rt_object_libraries(AsanStaticRuntimeThunk
+          ${SANITIZER_COMMON_SUPPORTED_OS}
+          ARCHS ${arch}
+          SOURCES ${ASAN_STATIC_RUNTIME_THUNK_SOURCES}
+          CFLAGS ${ASAN_DYNAMIC_CFLAGS} ${STATIC_RUNTIME_THUNK_CFLAGS}
+          DEFS ${ASAN_DYNAMIC_DEFINITIONS})
+
+        add_compiler_rt_runtime(clang_rt.asan_static_runtime_thunk
+          STATIC
+          ARCHS ${arch}
+          OBJECT_LIBS AsanStaticRuntimeThunk
+                      UbsanRuntimeThunk
+                      SancovRuntimeThunk
+                      SanitizerRuntimeThunk
+          CFLAGS ${ASAN_DYNAMIC_CFLAGS} ${STATIC_RUNTIME_THUNK_CFLAGS}
+          DEFS ${ASAN_DYNAMIC_DEFINITIONS}
+          PARENT_TARGET asan)
+      endif()
     endif()
   endforeach()
 endif()
diff --git compiler-rt/lib/asan/asan_flags.cpp compiler-rt/lib/asan/asan_flags.cpp
index 239898433232..56deb1b0d082 100644
--- compiler-rt/lib/asan/asan_flags.cpp
+++ compiler-rt/lib/asan/asan_flags.cpp
@@ -11,14 +11,16 @@
 // ASan flag parsing logic.
 //===----------------------------------------------------------------------===//
 
-#include "asan_activation.h"
 #include "asan_flags.h"
+
+#include "asan_activation.h"
 #include "asan_interface_internal.h"
 #include "asan_stack.h"
 #include "lsan/lsan_common.h"
 #include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_flags.h"
 #include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_win_interception.h"
 #include "ubsan/ubsan_flags.h"
 #include "ubsan/ubsan_platform.h"
 
@@ -47,7 +49,21 @@ static void RegisterAsanFlags(FlagParser *parser, Flags *f) {
 #undef ASAN_FLAG
 }
 
-void InitializeFlags() {
+static void DisplayHelpMessages(FlagParser *parser) {
+  // TODO(eugenis): dump all flags at verbosity>=2?
+  if (Verbosity()) {
+    ReportUnrecognizedFlags();
+  }
+
+  if (common_flags()->help) {
+    parser->PrintFlagDescriptions();
+  }
+}
+
+static void InitializeDefaultFlags() {
+  Flags *f = flags();
+  FlagParser asan_parser;
+
   // Set the default values and prepare for parsing ASan and common flags.
   SetCommonFlagsDefaults();
   {
@@ -60,10 +76,8 @@ void InitializeFlags() {
     cf.exitcode = 1;
     OverrideCommonFlags(cf);
   }
-  Flags *f = flags();
   f->SetDefaults();
 
-  FlagParser asan_parser;
   RegisterAsanFlags(&asan_parser, f);
   RegisterCommonFlags(&asan_parser);
 
@@ -126,13 +140,12 @@ void InitializeFlags() {
 
   InitializeCommonFlags();
 
-  // TODO(eugenis): dump all flags at verbosity>=2?
-  if (Verbosity()) ReportUnrecognizedFlags();
+  // TODO(samsonov): print all of the flags (ASan, LSan, common).
+  DisplayHelpMessages(&asan_parser);
+}
 
-  if (common_flags()->help) {
-    // TODO(samsonov): print all of the flags (ASan, LSan, common).
-    asan_parser.PrintFlagDescriptions();
-  }
+static void ProcessFlags() {
+  Flags *f = flags();
 
   // Flag validation:
   if (!CAN_SANITIZE_LEAKS && common_flags()->detect_leaks) {
@@ -199,6 +212,67 @@ void InitializeFlags() {
   }
 }
 
+void InitializeFlags() {
+  InitializeDefaultFlags();
+  ProcessFlags();
+
+#if SANITIZER_WINDOWS
+  // On Windows, weak symbols are emulated by having the user program
+  // register which weak functions are defined.
+  // The ASAN DLL will initialize flags prior to user module initialization,
+  // so __asan_default_options will not point to the user definition yet.
+  // We still want to ensure we capture when options are passed via
+  // __asan_default_options, so we add a callback to be run
+  // when it is registered with the runtime.
+
+  // There is theoretically time between the initial ProcessFlags and
+  // registering the weak callback where a weak function could be added and we
+  // would miss it, but in practice, InitializeFlags will always happen under
+  // the loader lock (if built as a DLL) and so will any calls to
+  // __sanitizer_register_weak_function.
+  AddRegisterWeakFunctionCallback(
+      reinterpret_cast<uptr>(__asan_default_options), []() {
+        FlagParser asan_parser;
+
+        RegisterAsanFlags(&asan_parser, flags());
+        RegisterCommonFlags(&asan_parser);
+        asan_parser.ParseString(__asan_default_options());
+
+        DisplayHelpMessages(&asan_parser);
+        ProcessFlags();
+      });
+
+#  if CAN_SANITIZE_UB
+  AddRegisterWeakFunctionCallback(
+      reinterpret_cast<uptr>(__ubsan_default_options), []() {
+        FlagParser ubsan_parser;
+
+        __ubsan::RegisterUbsanFlags(&ubsan_parser, __ubsan::flags());
+        RegisterCommonFlags(&ubsan_parser);
+        ubsan_parser.ParseString(__ubsan_default_options());
+
+        // To match normal behavior, do not print UBSan help.
+        ProcessFlags();
+      });
+#  endif
+
+#  if CAN_SANITIZE_LEAKS
+  AddRegisterWeakFunctionCallback(
+      reinterpret_cast<uptr>(__lsan_default_options), []() {
+        FlagParser lsan_parser;
+
+        __lsan::RegisterLsanFlags(&lsan_parser, __lsan::flags());
+        RegisterCommonFlags(&lsan_parser);
+        lsan_parser.ParseString(__lsan_default_options());
+
+        // To match normal behavior, do not print LSan help.
+        ProcessFlags();
+      });
+#  endif
+
+#endif
+}
+
 }  // namespace __asan
 
 SANITIZER_INTERFACE_WEAK_DEF(const char*, __asan_default_options, void) {
diff --git compiler-rt/lib/asan/asan_globals_win.cpp compiler-rt/lib/asan/asan_globals_win.cpp
index 9442cc35d5ab..2b59595dcd3b 100644
--- compiler-rt/lib/asan/asan_globals_win.cpp
+++ compiler-rt/lib/asan/asan_globals_win.cpp
@@ -28,7 +28,9 @@ static void call_on_globals(void (*hook)(__asan_global *, uptr)) {
   __asan_global *end = &__asan_globals_end;
   uptr bytediff = (uptr)end - (uptr)start;
   if (bytediff % sizeof(__asan_global) != 0) {
-#if defined(SANITIZER_DLL_THUNK) || defined(SANITIZER_DYNAMIC_RUNTIME_THUNK)
+#  if defined(SANITIZER_DLL_THUNK) ||             \
+      defined(SANITIZER_DYNAMIC_RUNTIME_THUNK) || \
+      defined(SANITIZER_STATIC_RUNTIME_THUNK)
     __debugbreak();
 #else
     CHECK("corrupt asan global array");
diff --git compiler-rt/lib/asan/asan_malloc_linux.cpp compiler-rt/lib/asan/asan_malloc_linux.cpp
index 08a63045c4e6..3d6b03fefab7 100644
--- compiler-rt/lib/asan/asan_malloc_linux.cpp
+++ compiler-rt/lib/asan/asan_malloc_linux.cpp
@@ -25,7 +25,6 @@
 #  include "sanitizer_common/sanitizer_allocator_checks.h"
 #  include "sanitizer_common/sanitizer_allocator_dlsym.h"
 #  include "sanitizer_common/sanitizer_errno.h"
-#  include "sanitizer_common/sanitizer_tls_get_addr.h"
 
 // ---------------------- Replacement functions ---------------- {{{1
 using namespace __asan;
@@ -99,9 +98,7 @@ INTERCEPTOR(void*, memalign, uptr boundary, uptr size) {
 
 INTERCEPTOR(void*, __libc_memalign, uptr boundary, uptr size) {
   GET_STACK_TRACE_MALLOC;
-  void *res = asan_memalign(boundary, size, &stack, FROM_MALLOC);
-  DTLS_on_libc_memalign(res, size);
-  return res;
+  return asan_memalign(boundary, size, &stack, FROM_MALLOC);
 }
 #endif // SANITIZER_INTERCEPT_MEMALIGN
 
diff --git compiler-rt/lib/asan/asan_malloc_win.cpp compiler-rt/lib/asan/asan_malloc_win.cpp
index 7e1d04c36dd5..3278f0721987 100644
--- compiler-rt/lib/asan/asan_malloc_win.cpp
+++ compiler-rt/lib/asan/asan_malloc_win.cpp
@@ -58,97 +58,69 @@ using namespace __asan;
 // MD: Memory allocation functions are defined in the CRT .dll,
 // so we have to intercept them before they are called for the first time.
 
-#if ASAN_DYNAMIC
-# define ALLOCATION_FUNCTION_ATTRIBUTE
-#else
-# define ALLOCATION_FUNCTION_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
-#endif
-
 extern "C" {
-ALLOCATION_FUNCTION_ATTRIBUTE
-size_t _msize(void *ptr) {
+__declspec(noinline) size_t _msize(void *ptr) {
   GET_CURRENT_PC_BP_SP;
   (void)sp;
   return asan_malloc_usable_size(ptr, pc, bp);
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-size_t _msize_base(void *ptr) {
-  return _msize(ptr);
-}
+__declspec(noinline) size_t _msize_base(void *ptr) { return _msize(ptr); }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void free(void *ptr) {
+__declspec(noinline) void free(void *ptr) {
   GET_STACK_TRACE_FREE;
   return asan_free(ptr, &stack, FROM_MALLOC);
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void _free_dbg(void *ptr, int) {
-  free(ptr);
-}
+__declspec(noinline) void _free_dbg(void *ptr, int) { free(ptr); }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void _free_base(void *ptr) {
-  free(ptr);
-}
+__declspec(noinline) void _free_base(void *ptr) { free(ptr); }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *malloc(size_t size) {
+__declspec(noinline) void *malloc(size_t size) {
   GET_STACK_TRACE_MALLOC;
   return asan_malloc(size, &stack);
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *_malloc_base(size_t size) {
-  return malloc(size);
-}
+__declspec(noinline) void *_malloc_base(size_t size) { return malloc(size); }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *_malloc_dbg(size_t size, int, const char *, int) {
+__declspec(noinline) void *_malloc_dbg(size_t size, int, const char *, int) {
   return malloc(size);
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *calloc(size_t nmemb, size_t size) {
+__declspec(noinline) void *calloc(size_t nmemb, size_t size) {
   GET_STACK_TRACE_MALLOC;
   return asan_calloc(nmemb, size, &stack);
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *_calloc_base(size_t nmemb, size_t size) {
+__declspec(noinline) void *_calloc_base(size_t nmemb, size_t size) {
   return calloc(nmemb, size);
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *_calloc_dbg(size_t nmemb, size_t size, int, const char *, int) {
+__declspec(noinline) void *_calloc_dbg(size_t nmemb, size_t size, int,
+                                       const char *, int) {
   return calloc(nmemb, size);
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *_calloc_impl(size_t nmemb, size_t size, int *errno_tmp) {
+__declspec(noinline) void *_calloc_impl(size_t nmemb, size_t size,
+                                        int *errno_tmp) {
   return calloc(nmemb, size);
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *realloc(void *ptr, size_t size) {
+__declspec(noinline) void *realloc(void *ptr, size_t size) {
   GET_STACK_TRACE_MALLOC;
   return asan_realloc(ptr, size, &stack);
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *_realloc_dbg(void *ptr, size_t size, int) {
+__declspec(noinline) void *_realloc_dbg(void *ptr, size_t size, int) {
   UNREACHABLE("_realloc_dbg should not exist!");
   return 0;
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *_realloc_base(void *ptr, size_t size) {
+__declspec(noinline) void *_realloc_base(void *ptr, size_t size) {
   return realloc(ptr, size);
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *_recalloc(void *p, size_t n, size_t elem_size) {
+__declspec(noinline) void *_recalloc(void *p, size_t n, size_t elem_size) {
   if (!p)
     return calloc(n, elem_size);
   const size_t size = n * elem_size;
@@ -166,23 +138,41 @@ void *_recalloc(void *p, size_t n, size_t elem_size) {
   return new_alloc;
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *_recalloc_base(void *p, size_t n, size_t elem_size) {
+__declspec(noinline) void *_recalloc_base(void *p, size_t n, size_t elem_size) {
   return _recalloc(p, n, elem_size);
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *_expand(void *memblock, size_t size) {
+__declspec(noinline) void *_expand(void *memblock, size_t size) {
   // _expand is used in realloc-like functions to resize the buffer if possible.
   // We don't want memory to stand still while resizing buffers, so return 0.
   return 0;
 }
 
-ALLOCATION_FUNCTION_ATTRIBUTE
-void *_expand_dbg(void *memblock, size_t size) {
+__declspec(noinline) void *_expand_dbg(void *memblock, size_t size) {
   return _expand(memblock, size);
 }
 
+__declspec(dllexport) size_t __cdecl __asan_msize(void *ptr) {
+  return _msize(ptr);
+}
+__declspec(dllexport) void __cdecl __asan_free(void *const ptr) { free(ptr); }
+__declspec(dllexport) void *__cdecl __asan_malloc(const size_t size) {
+  return malloc(size);
+}
+__declspec(dllexport) void *__cdecl __asan_calloc(const size_t nmemb,
+                                                  const size_t size) {
+  return calloc(nmemb, size);
+}
+__declspec(dllexport) void *__cdecl __asan_realloc(void *const ptr,
+                                                   const size_t size) {
+  return realloc(ptr, size);
+}
+__declspec(dllexport) void *__cdecl __asan_recalloc(void *const ptr,
+                                                    const size_t nmemb,
+                                                    const size_t size) {
+  return _recalloc(ptr, nmemb, size);
+}
+
 // TODO(timurrrr): Might want to add support for _aligned_* allocation
 // functions to detect a bit more bugs.  Those functions seem to wrap malloc().
 
@@ -487,7 +477,6 @@ static void TryToOverrideFunction(const char *fname, uptr new_func) {
 }
 
 void ReplaceSystemMalloc() {
-#if defined(ASAN_DYNAMIC)
   TryToOverrideFunction("free", (uptr)free);
   TryToOverrideFunction("_free_base", (uptr)free);
   TryToOverrideFunction("malloc", (uptr)malloc);
@@ -543,8 +532,6 @@ void ReplaceSystemMalloc() {
   // allocation API will be directed to ASan's heap. We don't currently
   // intercept all calls to HeapAlloc. If we did, we would have to check on
   // HeapFree whether the pointer came from ASan of from the system.
-
-#endif  // defined(ASAN_DYNAMIC)
 }
 }  // namespace __asan
 
diff --git compiler-rt/lib/asan/asan_malloc_win_thunk.cpp compiler-rt/lib/asan/asan_malloc_win_thunk.cpp
new file mode 100644
index 000000000000..abf515b77c4a
--- /dev/null
+++ compiler-rt/lib/asan/asan_malloc_win_thunk.cpp
@@ -0,0 +1,229 @@
+//===-- asan_malloc_win_thunk.cpp
+//-----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Windows-specific malloc interception.
+// This is included statically for projects statically linking
+// with the C Runtime (/MT, /MTd) in order to provide ASAN-aware
+// versions of the C allocation functions.
+//===----------------------------------------------------------------------===//
+
+#ifdef SANITIZER_STATIC_RUNTIME_THUNK
+#  include "..\sanitizer_common\sanitizer_allocator_interface.h"
+// #include "asan_win_thunk_common.h"
+
+// Preserve stack traces with noinline.
+#  define STATIC_MALLOC_INTERFACE __declspec(noinline)
+
+extern "C" {
+__declspec(dllimport) size_t __cdecl __asan_msize(void *ptr);
+__declspec(dllimport) void __cdecl __asan_free(void *const ptr);
+__declspec(dllimport) void *__cdecl __asan_malloc(const size_t size);
+__declspec(dllimport) void *__cdecl __asan_calloc(const size_t nmemb,
+                                                  const size_t size);
+__declspec(dllimport) void *__cdecl __asan_realloc(void *const ptr,
+                                                   const size_t size);
+__declspec(dllimport) void *__cdecl __asan_recalloc(void *const ptr,
+                                                    const size_t nmemb,
+                                                    const size_t size);
+
+// Avoid tailcall optimization to preserve stack frames.
+#  pragma optimize("", off)
+
+// _msize
+STATIC_MALLOC_INTERFACE size_t _msize(void *ptr) { return __asan_msize(ptr); }
+
+STATIC_MALLOC_INTERFACE size_t _msize_base(void *ptr) {
+  return __asan_msize(ptr);
+}
+
+STATIC_MALLOC_INTERFACE size_t _msize_dbg(void *ptr) {
+  return __asan_msize(ptr);
+}
+
+// free
+STATIC_MALLOC_INTERFACE void free(void *const ptr) { return __asan_free(ptr); }
+
+STATIC_MALLOC_INTERFACE void _free_base(void *const ptr) {
+  return __asan_free(ptr);
+}
+
+STATIC_MALLOC_INTERFACE void _free_dbg(void *const ptr) {
+  return __asan_free(ptr);
+}
+
+// malloc
+STATIC_MALLOC_INTERFACE void *malloc(const size_t size) {
+  return __asan_malloc(size);
+}
+
+STATIC_MALLOC_INTERFACE void *_malloc_base(const size_t size) {
+  return __asan_malloc(size);
+}
+
+STATIC_MALLOC_INTERFACE void *_malloc_dbg(const size_t size) {
+  return __asan_malloc(size);
+}
+
+// calloc
+STATIC_MALLOC_INTERFACE void *calloc(const size_t nmemb, const size_t size) {
+  return __asan_calloc(nmemb, size);
+}
+
+STATIC_MALLOC_INTERFACE void *_calloc_base(const size_t nmemb,
+                                           const size_t size) {
+  return __asan_calloc(nmemb, size);
+}
+
+STATIC_MALLOC_INTERFACE void *_calloc_impl(const size_t nmemb,
+                                           const size_t size,
+                                           int *const errno_tmp) {
+  // Provided by legacy msvcrt.
+  (void)errno_tmp;
+
+  return __asan_calloc(nmemb, size);
+}
+
+STATIC_MALLOC_INTERFACE void *_calloc_dbg(const size_t nmemb, const size_t size,
+                                          int, const char *, int) {
+  return __asan_calloc(nmemb, size);
+}
+
+// realloc
+STATIC_MALLOC_INTERFACE void *realloc(void *const ptr, const size_t size) {
+  return __asan_realloc(ptr, size);
+}
+
+STATIC_MALLOC_INTERFACE void *_realloc_base(void *const ptr,
+                                            const size_t size) {
+  return __asan_realloc(ptr, size);
+}
+
+STATIC_MALLOC_INTERFACE void *_realloc_dbg(void *const ptr, const size_t size,
+                                           int, const char *, int) {
+  return __asan_realloc(ptr, size);
+}
+
+// recalloc
+STATIC_MALLOC_INTERFACE void *_recalloc(void *const ptr, const size_t nmemb,
+                                        const size_t size) {
+  return __asan_recalloc(ptr, nmemb, size);
+}
+
+STATIC_MALLOC_INTERFACE void *_recalloc_base(void *const ptr,
+                                             const size_t nmemb,
+                                             const size_t size) {
+  return __asan_recalloc(ptr, nmemb, size);
+}
+
+STATIC_MALLOC_INTERFACE void *_recalloc_dbg(void *const ptr, const size_t nmemb,
+                                            const size_t size, int,
+                                            const char *, int) {
+  return __asan_recalloc(ptr, nmemb, size);
+}
+
+// expand
+STATIC_MALLOC_INTERFACE void *_expand(void *, size_t) {
+  // _expand is used in realloc-like functions to resize the buffer if possible.
+  // We don't want memory to stand still while resizing buffers, so return 0.
+  return nullptr;
+}
+
+STATIC_MALLOC_INTERFACE void *_expand_dbg(void *, size_t, int, const char *,
+                                          int) {
+  return nullptr;
+}
+
+// We need to provide symbols for all the debug CRT functions if we decide to
+// provide any. Most of these functions make no sense under ASan and so we
+// make them no-ops.
+long _CrtSetBreakAlloc(long const) { return ~0; }
+
+void _CrtSetDbgBlockType(void *const, int const) { return; }
+
+typedef int(__cdecl *CRT_ALLOC_HOOK)(int, void *, size_t, int, long,
+                                     const unsigned char *, int);
+
+CRT_ALLOC_HOOK _CrtGetAllocHook() { return nullptr; }
+
+CRT_ALLOC_HOOK _CrtSetAllocHook(CRT_ALLOC_HOOK const hook) { return hook; }
+
+int _CrtCheckMemory() { return 1; }
+
+int _CrtSetDbgFlag(int const new_bits) { return new_bits; }
+
+typedef void (*CrtDoForAllClientObjectsCallback)(void *, void *);
+
+void _CrtDoForAllClientObjects(CrtDoForAllClientObjectsCallback const,
+                               void *const) {
+  return;
+}
+
+int _CrtIsValidPointer(void const *const p, unsigned int const, int const) {
+  return p != nullptr;
+}
+
+int _CrtIsValidHeapPointer(void const *const block) {
+  if (!block) {
+    return 0;
+  }
+
+  return __sanitizer_get_ownership(block);
+}
+
+int _CrtIsMemoryBlock(void const *const, unsigned const, long *const,
+                      char **const, int *const) {
+  return 0;
+}
+
+int _CrtReportBlockType(void const *const) { return -1; }
+
+typedef void(__cdecl *CRT_DUMP_CLIENT)(void *, size_t);
+
+CRT_DUMP_CLIENT _CrtGetDumpClient() { return nullptr; }
+
+CRT_DUMP_CLIENT _CrtSetDumpClient(CRT_DUMP_CLIENT new_client) {
+  return new_client;
+}
+
+void _CrtMemCheckpoint(void *const) { return; }
+
+int _CrtMemDifference(void *const, void const *const, void const *const) {
+  return 0;
+}
+
+void _CrtMemDumpAllObjectsSince(void const *const) { return; }
+
+int _CrtDumpMemoryLeaks() { return 0; }
+
+void _CrtMemDumpStatistics(void const *const) { return; }
+
+int _crtDbgFlag{0};
+long _crtBreakAlloc{-1};
+CRT_DUMP_CLIENT _pfnDumpClient{nullptr};
+
+int *__p__crtDbgFlag() { return &_crtDbgFlag; }
+
+long *__p__crtBreakAlloc() { return &_crtBreakAlloc; }
+
+// TODO: These were added upstream but conflict with definitions in ucrtbased.
+// int _CrtDbgReport(int, const char *, int, const char *, const char *, ...) {
+//   ShowStatsAndAbort();
+// }
+//
+// int _CrtDbgReportW(int reportType, const wchar_t *, int, const wchar_t *,
+//                    const wchar_t *, ...) {
+//   ShowStatsAndAbort();
+// }
+//
+// int _CrtSetReportMode(int, int) { return 0; }
+
+}  // extern "C"
+#endif  // SANITIZER_STATIC_RUNTIME_THUNK
diff --git compiler-rt/lib/asan/asan_win_common_runtime_thunk.cpp compiler-rt/lib/asan/asan_win_common_runtime_thunk.cpp
new file mode 100644
index 000000000000..d2c9e66c3133
--- /dev/null
+++ compiler-rt/lib/asan/asan_win_common_runtime_thunk.cpp
@@ -0,0 +1,112 @@
+//===-- asan_win_common_runtime_thunk.cpp --------------------------- -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// This file defines things that need to be present in the application modules
+// to interact with the ASan DLL runtime correctly and can't be implemented
+// using the default "import library" generated when linking the DLL.
+//
+// This includes:
+//  - Cloning shadow memory dynamic address from ASAN DLL
+//  - Creating weak aliases to default implementation imported from asan dll
+//  - Forwarding the detect_stack_use_after_return runtime option
+//  - installing a custom SEH handler
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(SANITIZER_DYNAMIC_RUNTIME_THUNK) || \
+    defined(SANITIZER_STATIC_RUNTIME_THUNK)
+#  define SANITIZER_IMPORT_INTERFACE 1
+#  define WIN32_LEAN_AND_MEAN
+#  include "asan_win_common_runtime_thunk.h"
+
+#  include <windows.h>
+
+#  include "sanitizer_common/sanitizer_win_defs.h"
+#  include "sanitizer_common/sanitizer_win_thunk_interception.h"
+
+// Define weak alias for all weak functions imported from asan dll.
+#  define INTERFACE_FUNCTION(Name)
+#  define INTERFACE_WEAK_FUNCTION(Name) REGISTER_WEAK_FUNCTION(Name)
+#  include "asan_interface.inc"
+
+////////////////////////////////////////////////////////////////////////////////
+// Define a copy of __asan_option_detect_stack_use_after_return that should be
+// used when linking an MD runtime with a set of object files on Windows.
+//
+// The ASan MD runtime dllexports '__asan_option_detect_stack_use_after_return',
+// so normally we would just dllimport it.  Unfortunately, the dllimport
+// attribute adds __imp_ prefix to the symbol name of a variable.
+// Since in general we don't know if a given TU is going to be used
+// with a MT or MD runtime and we don't want to use ugly __imp_ names on Windows
+// just to work around this issue, let's clone the variable that is constant
+// after initialization anyways.
+
+extern "C" {
+__declspec(dllimport) int __asan_should_detect_stack_use_after_return();
+int __asan_option_detect_stack_use_after_return;
+
+__declspec(dllimport) void *__asan_get_shadow_memory_dynamic_address();
+void *__asan_shadow_memory_dynamic_address;
+
+static void __asan_initialize_cloned_variables() {
+  __asan_option_detect_stack_use_after_return =
+      __asan_should_detect_stack_use_after_return();
+  __asan_shadow_memory_dynamic_address =
+      __asan_get_shadow_memory_dynamic_address();
+}
+}
+
+static int asan_thunk_init() {
+  __asan_initialize_cloned_variables();
+
+#  ifdef SANITIZER_STATIC_RUNTIME_THUNK
+  __asan_initialize_static_thunk();
+#  endif
+
+  return 0;
+}
+
+static void WINAPI asan_thread_init(void *mod, unsigned long reason,
+                                    void *reserved) {
+  if (reason == DLL_PROCESS_ATTACH) {
+    asan_thunk_init();
+  }
+}
+
+// Our cloned variables must be initialized before C/C++ constructors.  If TLS
+// is used, our .CRT$XLAB initializer will run first. If not, our .CRT$XIB
+// initializer is needed as a backup.
+extern "C" __declspec(allocate(".CRT$XIB")) int (*__asan_thunk_init)() =
+    asan_thunk_init;
+WIN_FORCE_LINK(__asan_thunk_init);
+
+extern "C" __declspec(allocate(".CRT$XLAB")) void(WINAPI *__asan_tls_init)(
+    void *, unsigned long, void *) = asan_thread_init;
+WIN_FORCE_LINK(__asan_tls_init);
+
+////////////////////////////////////////////////////////////////////////////////
+// ASan SEH handling.
+// We need to set the ASan-specific SEH handler at the end of CRT initialization
+// of each module (see also asan_win.cpp).
+extern "C" {
+__declspec(dllimport) int __asan_set_seh_filter();
+static int SetSEHFilter() { return __asan_set_seh_filter(); }
+
+// Unfortunately, putting a pointer to __asan_set_seh_filter into
+// __asan_intercept_seh gets optimized out, so we have to use an extra function.
+extern "C" __declspec(allocate(".CRT$XCAB")) int (*__asan_seh_interceptor)() =
+    SetSEHFilter;
+WIN_FORCE_LINK(__asan_seh_interceptor);
+}
+
+WIN_FORCE_LINK(__asan_dso_reg_hook)
+
+#endif  // defined(SANITIZER_DYNAMIC_RUNTIME_THUNK) ||
+        // defined(SANITIZER_STATIC_RUNTIME_THUNK)
diff --git compiler-rt/lib/asan/asan_win_common_runtime_thunk.h compiler-rt/lib/asan/asan_win_common_runtime_thunk.h
new file mode 100644
index 000000000000..66285eb31ae9
--- /dev/null
+++ compiler-rt/lib/asan/asan_win_common_runtime_thunk.h
@@ -0,0 +1,38 @@
+//===-- asan_win_common_runtime_thunk.h -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// This file defines things that need to be present in the application modules
+// to interact with the ASan DLL runtime correctly and can't be implemented
+// using the default "import library" generated when linking the DLL.
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(SANITIZER_STATIC_RUNTIME_THUNK) || \
+    defined(SANITIZER_DYNAMIC_RUNTIME_THUNK)
+#  include "sanitizer_common/sanitizer_win_defs.h"
+
+#  pragma section(".CRT$XIB", long, \
+                  read)  // C initializer (during C init before dyninit)
+#  pragma section(".CRT$XID", long, \
+                  read)  // First C initializer after CRT initializers
+#  pragma section(".CRT$XCAB", long, \
+                  read)  // First C++ initializer after startup initializers
+
+#  pragma section(".CRT$XTW", long, read)  // First ASAN globals terminator
+#  pragma section(".CRT$XTY", long, read)  // Last ASAN globals terminator
+
+#  pragma section(".CRT$XLAB", long, read)  // First TLS initializer
+
+#  ifdef SANITIZER_STATIC_RUNTIME_THUNK
+extern "C" void __asan_initialize_static_thunk();
+#  endif
+
+#endif  // defined(SANITIZER_STATIC_RUNTIME_THUNK) ||
+        // defined(SANITIZER_DYNAMIC_RUNTIME_THUNK)
\ No newline at end of file
diff --git compiler-rt/lib/asan/asan_win_dll_thunk.cpp compiler-rt/lib/asan/asan_win_dll_thunk.cpp
deleted file mode 100644
index 35871a942a7a..000000000000
--- compiler-rt/lib/asan/asan_win_dll_thunk.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-//===-- asan_win_dll_thunk.cpp --------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of AddressSanitizer, an address sanity checker.
-//
-// This file defines a family of thunks that should be statically linked into
-// the DLLs that have ASan instrumentation in order to delegate the calls to the
-// shared runtime that lives in the main binary.
-// See https://github.com/google/sanitizers/issues/209 for the details.
-//===----------------------------------------------------------------------===//
-
-#ifdef SANITIZER_DLL_THUNK
-#include "asan_init_version.h"
-#include "interception/interception.h"
-#include "sanitizer_common/sanitizer_win_defs.h"
-#include "sanitizer_common/sanitizer_win_dll_thunk.h"
-#include "sanitizer_common/sanitizer_platform_interceptors.h"
-
-// ASan own interface functions.
-#define INTERFACE_FUNCTION(Name) INTERCEPT_SANITIZER_FUNCTION(Name)
-#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
-#include "asan_interface.inc"
-
-// Memory allocation functions.
-INTERCEPT_WRAP_V_W(free)
-INTERCEPT_WRAP_V_W(_free_base)
-INTERCEPT_WRAP_V_WW(_free_dbg)
-
-INTERCEPT_WRAP_W_W(malloc)
-INTERCEPT_WRAP_W_W(_malloc_base)
-INTERCEPT_WRAP_W_WWWW(_malloc_dbg)
-
-INTERCEPT_WRAP_W_WW(calloc)
-INTERCEPT_WRAP_W_WW(_calloc_base)
-INTERCEPT_WRAP_W_WWWWW(_calloc_dbg)
-INTERCEPT_WRAP_W_WWW(_calloc_impl)
-
-INTERCEPT_WRAP_W_WW(realloc)
-INTERCEPT_WRAP_W_WW(_realloc_base)
-INTERCEPT_WRAP_W_WWW(_realloc_dbg)
-INTERCEPT_WRAP_W_WWW(_recalloc)
-INTERCEPT_WRAP_W_WWW(_recalloc_base)
-
-INTERCEPT_WRAP_W_W(_msize)
-INTERCEPT_WRAP_W_W(_msize_base)
-INTERCEPT_WRAP_W_W(_expand)
-INTERCEPT_WRAP_W_W(_expand_dbg)
-
-// TODO(timurrrr): Might want to add support for _aligned_* allocation
-// functions to detect a bit more bugs.  Those functions seem to wrap malloc().
-
-// TODO(timurrrr): Do we need to add _Crt* stuff here? (see asan_malloc_win.cpp)
-
-#  if defined(_MSC_VER) && !defined(__clang__)
-// Disable warnings such as: 'void memchr(void)': incorrect number of arguments
-// for intrinsic function, expected '3' arguments.
-#    pragma warning(push)
-#    pragma warning(disable : 4392)
-#  endif
-
-INTERCEPT_LIBRARY_FUNCTION(atoi);
-INTERCEPT_LIBRARY_FUNCTION(atol);
-INTERCEPT_LIBRARY_FUNCTION(atoll);
-INTERCEPT_LIBRARY_FUNCTION(frexp);
-INTERCEPT_LIBRARY_FUNCTION(longjmp);
-#if SANITIZER_INTERCEPT_MEMCHR
-INTERCEPT_LIBRARY_FUNCTION(memchr);
-#endif
-INTERCEPT_LIBRARY_FUNCTION(memcmp);
-INTERCEPT_LIBRARY_FUNCTION(memcpy);
-INTERCEPT_LIBRARY_FUNCTION(memmove);
-INTERCEPT_LIBRARY_FUNCTION(memset);
-INTERCEPT_LIBRARY_FUNCTION(strcat);
-INTERCEPT_LIBRARY_FUNCTION(strchr);
-INTERCEPT_LIBRARY_FUNCTION(strcmp);
-INTERCEPT_LIBRARY_FUNCTION(strcpy);
-INTERCEPT_LIBRARY_FUNCTION(strcspn);
-INTERCEPT_LIBRARY_FUNCTION(_strdup);
-INTERCEPT_LIBRARY_FUNCTION(strlen);
-INTERCEPT_LIBRARY_FUNCTION(strncat);
-INTERCEPT_LIBRARY_FUNCTION(strncmp);
-INTERCEPT_LIBRARY_FUNCTION(strncpy);
-INTERCEPT_LIBRARY_FUNCTION(strnlen);
-INTERCEPT_LIBRARY_FUNCTION(strpbrk);
-INTERCEPT_LIBRARY_FUNCTION(strrchr);
-INTERCEPT_LIBRARY_FUNCTION(strspn);
-INTERCEPT_LIBRARY_FUNCTION(strstr);
-INTERCEPT_LIBRARY_FUNCTION(strtok);
-INTERCEPT_LIBRARY_FUNCTION(strtol);
-INTERCEPT_LIBRARY_FUNCTION(strtoll);
-INTERCEPT_LIBRARY_FUNCTION(wcslen);
-INTERCEPT_LIBRARY_FUNCTION(wcsnlen);
-
-#  if defined(_MSC_VER) && !defined(__clang__)
-#    pragma warning(pop)
-#  endif
-
-#ifdef _WIN64
-INTERCEPT_LIBRARY_FUNCTION(__C_specific_handler);
-#else
-INTERCEPT_LIBRARY_FUNCTION(_except_handler3);
-// _except_handler4 checks -GS cookie which is different for each module, so we
-// can't use INTERCEPT_LIBRARY_FUNCTION(_except_handler4).
-INTERCEPTOR(int, _except_handler4, void *a, void *b, void *c, void *d) {
-  __asan_handle_no_return();
-  return REAL(_except_handler4)(a, b, c, d);
-}
-#endif
-
-// Windows specific functions not included in asan_interface.inc.
-INTERCEPT_WRAP_W_V(__asan_should_detect_stack_use_after_return)
-INTERCEPT_WRAP_W_V(__asan_get_shadow_memory_dynamic_address)
-INTERCEPT_WRAP_W_W(__asan_unhandled_exception_filter)
-
-using namespace __sanitizer;
-
-extern "C" {
-int __asan_option_detect_stack_use_after_return;
-uptr __asan_shadow_memory_dynamic_address;
-} // extern "C"
-
-static int asan_dll_thunk_init() {
-  typedef void (*fntype)();
-  static fntype fn = 0;
-  // asan_dll_thunk_init is expected to be called by only one thread.
-  if (fn) return 0;
-
-  // Ensure all interception was executed.
-  __dll_thunk_init();
-
-  fn = (fntype) dllThunkGetRealAddrOrDie("__asan_init");
-  fn();
-  __asan_option_detect_stack_use_after_return =
-      (__asan_should_detect_stack_use_after_return() != 0);
-  __asan_shadow_memory_dynamic_address =
-      (uptr)__asan_get_shadow_memory_dynamic_address();
-
-#ifndef _WIN64
-  INTERCEPT_FUNCTION(_except_handler4);
-#endif
-  // In DLLs, the callbacks are expected to return 0,
-  // otherwise CRT initialization fails.
-  return 0;
-}
-
-#pragma section(".CRT$XIB", long, read)
-__declspec(allocate(".CRT$XIB")) int (*__asan_preinit)() = asan_dll_thunk_init;
-
-static void WINAPI asan_thread_init(void *mod, unsigned long reason,
-                                    void *reserved) {
-  if (reason == /*DLL_PROCESS_ATTACH=*/1) asan_dll_thunk_init();
-}
-
-#pragma section(".CRT$XLAB", long, read)
-__declspec(allocate(".CRT$XLAB")) void (WINAPI *__asan_tls_init)(void *,
-    unsigned long, void *) = asan_thread_init;
-
-WIN_FORCE_LINK(__asan_dso_reg_hook)
-
-#endif // SANITIZER_DLL_THUNK
diff --git compiler-rt/lib/asan/asan_win_dynamic_runtime_thunk.cpp compiler-rt/lib/asan/asan_win_dynamic_runtime_thunk.cpp
index f0b5ec9eef7f..421fe651b7d9 100644
--- compiler-rt/lib/asan/asan_win_dynamic_runtime_thunk.cpp
+++ compiler-rt/lib/asan/asan_win_dynamic_runtime_thunk.cpp
@@ -8,76 +8,17 @@
 //
 // This file is a part of AddressSanitizer, an address sanity checker.
 //
-// This file defines things that need to be present in the application modules
-// to interact with the ASan DLL runtime correctly and can't be implemented
-// using the default "import library" generated when linking the DLL RTL.
-//
-// This includes:
-//  - creating weak aliases to default implementation imported from asan dll.
-//  - forwarding the detect_stack_use_after_return runtime option
-//  - working around deficiencies of the MD runtime
-//  - installing a custom SEH handler
+// This file defines things that need to be present for application modules
+// that are dynamic linked with the C Runtime.
 //
 //===----------------------------------------------------------------------===//
 
 #ifdef SANITIZER_DYNAMIC_RUNTIME_THUNK
-#define SANITIZER_IMPORT_INTERFACE 1
-#include "sanitizer_common/sanitizer_win_defs.h"
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
-// Define weak alias for all weak functions imported from asan dll.
-#define INTERFACE_FUNCTION(Name)
-#define INTERFACE_WEAK_FUNCTION(Name) WIN_WEAK_IMPORT_DEF(Name)
-#include "asan_interface.inc"
-
-// First, declare CRT sections we'll be using in this file
-#pragma section(".CRT$XIB", long, read)
-#pragma section(".CRT$XID", long, read)
-#pragma section(".CRT$XCAB", long, read)
-#pragma section(".CRT$XTW", long, read)
-#pragma section(".CRT$XTY", long, read)
-#pragma section(".CRT$XLAB", long, read)
-
-////////////////////////////////////////////////////////////////////////////////
-// Define a copy of __asan_option_detect_stack_use_after_return that should be
-// used when linking an MD runtime with a set of object files on Windows.
-//
-// The ASan MD runtime dllexports '__asan_option_detect_stack_use_after_return',
-// so normally we would just dllimport it.  Unfortunately, the dllimport
-// attribute adds __imp_ prefix to the symbol name of a variable.
-// Since in general we don't know if a given TU is going to be used
-// with a MT or MD runtime and we don't want to use ugly __imp_ names on Windows
-// just to work around this issue, let's clone the variable that is constant
-// after initialization anyways.
-extern "C" {
-__declspec(dllimport) int __asan_should_detect_stack_use_after_return();
-int __asan_option_detect_stack_use_after_return;
-
-__declspec(dllimport) void* __asan_get_shadow_memory_dynamic_address();
-void* __asan_shadow_memory_dynamic_address;
-}
-
-static int InitializeClonedVariables() {
-  __asan_option_detect_stack_use_after_return =
-    __asan_should_detect_stack_use_after_return();
-  __asan_shadow_memory_dynamic_address =
-    __asan_get_shadow_memory_dynamic_address();
-  return 0;
-}
-
-static void NTAPI asan_thread_init(void *mod, unsigned long reason,
-    void *reserved) {
-  if (reason == DLL_PROCESS_ATTACH) InitializeClonedVariables();
-}
+#  define WIN32_LEAN_AND_MEAN
+#  include <windows.h>
 
-// Our cloned variables must be initialized before C/C++ constructors.  If TLS
-// is used, our .CRT$XLAB initializer will run first. If not, our .CRT$XIB
-// initializer is needed as a backup.
-__declspec(allocate(".CRT$XIB")) int (*__asan_initialize_cloned_variables)() =
-    InitializeClonedVariables;
-__declspec(allocate(".CRT$XLAB")) void (NTAPI *__asan_tls_init)(void *,
-    unsigned long, void *) = asan_thread_init;
+#  include "asan_win_common_runtime_thunk.h"
+#  include "sanitizer_common/sanitizer_win_defs.h"
 
 ////////////////////////////////////////////////////////////////////////////////
 // For some reason, the MD CRT doesn't call the C/C++ terminators during on DLL
@@ -88,43 +29,26 @@ __declspec(allocate(".CRT$XLAB")) void (NTAPI *__asan_tls_init)(void *,
 // using atexit() that calls a small subset of C terminators
 // where LLVM global_dtors is placed.  Fingers crossed, no other C terminators
 // are there.
-extern "C" int __cdecl atexit(void (__cdecl *f)(void));
+extern "C" int __cdecl atexit(void(__cdecl *f)(void));
 extern "C" void __cdecl _initterm(void *a, void *b);
 
 namespace {
-__declspec(allocate(".CRT$XTW")) void* before_global_dtors = 0;
-__declspec(allocate(".CRT$XTY")) void* after_global_dtors = 0;
+__declspec(allocate(".CRT$XTW")) void *before_global_dtors = 0;
+__declspec(allocate(".CRT$XTY")) void *after_global_dtors = 0;
 
 void UnregisterGlobals() {
   _initterm(&before_global_dtors, &after_global_dtors);
 }
 
-int ScheduleUnregisterGlobals() {
-  return atexit(UnregisterGlobals);
-}
+int ScheduleUnregisterGlobals() { return atexit(UnregisterGlobals); }
 }  // namespace
 
 // We need to call 'atexit(UnregisterGlobals);' as early as possible, but after
 // atexit() is initialized (.CRT$XIC).  As this is executed before C++
 // initializers (think ctors for globals), UnregisterGlobals gets executed after
 // dtors for C++ globals.
-__declspec(allocate(".CRT$XID"))
-int (*__asan_schedule_unregister_globals)() = ScheduleUnregisterGlobals;
-
-////////////////////////////////////////////////////////////////////////////////
-// ASan SEH handling.
-// We need to set the ASan-specific SEH handler at the end of CRT initialization
-// of each module (see also asan_win.cpp).
-extern "C" {
-__declspec(dllimport) int __asan_set_seh_filter();
-static int SetSEHFilter() { return __asan_set_seh_filter(); }
-
-// Unfortunately, putting a pointer to __asan_set_seh_filter into
-// __asan_intercept_seh gets optimized out, so we have to use an extra function.
-__declspec(allocate(".CRT$XCAB")) int (*__asan_seh_interceptor)() =
-    SetSEHFilter;
-}
-
-WIN_FORCE_LINK(__asan_dso_reg_hook)
+extern "C" __declspec(allocate(".CRT$XID")) int (
+    *__asan_schedule_unregister_globals)() = ScheduleUnregisterGlobals;
+WIN_FORCE_LINK(__asan_schedule_unregister_globals)
 
-#endif // SANITIZER_DYNAMIC_RUNTIME_THUNK
+#endif  // SANITIZER_DYNAMIC_RUNTIME_THUNK
diff --git compiler-rt/lib/asan/asan_win_static_runtime_thunk.cpp compiler-rt/lib/asan/asan_win_static_runtime_thunk.cpp
new file mode 100644
index 000000000000..dec50a5e1d4d
--- /dev/null
+++ compiler-rt/lib/asan/asan_win_static_runtime_thunk.cpp
@@ -0,0 +1,110 @@
+//===-- asan_win_static_runtime_thunk.cpp ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// This file defines a family of thunks that should be statically linked into
+// modules that are statically linked with the C Runtime in order to delegate
+// the calls to the ASAN runtime DLL.
+// See https://github.com/google/sanitizers/issues/209 for the details.
+//===----------------------------------------------------------------------===//
+
+#ifdef SANITIZER_STATIC_RUNTIME_THUNK
+#  include "asan_init_version.h"
+#  include "asan_interface_internal.h"
+#  include "asan_win_common_runtime_thunk.h"
+#  include "sanitizer_common/sanitizer_platform_interceptors.h"
+#  include "sanitizer_common/sanitizer_win_defs.h"
+#  include "sanitizer_common/sanitizer_win_thunk_interception.h"
+
+#  if defined(_MSC_VER) && !defined(__clang__)
+// Disable warnings such as: 'void memchr(void)': incorrect number of arguments
+// for intrinsic function, expected '3' arguments.
+#    pragma warning(push)
+#    pragma warning(disable : 4392)
+#  endif
+
+#  define INTERCEPT_LIBRARY_FUNCTION_ASAN(X) \
+    INTERCEPT_LIBRARY_FUNCTION(X, "__asan_wrap_" #X)
+
+INTERCEPT_LIBRARY_FUNCTION_ASAN(atoi);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(atol);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(atoll);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(frexp);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(longjmp);
+#  if SANITIZER_INTERCEPT_MEMCHR
+INTERCEPT_LIBRARY_FUNCTION_ASAN(memchr);
+#  endif
+INTERCEPT_LIBRARY_FUNCTION_ASAN(memcmp);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(memcpy);
+#  ifndef _WIN64
+// memmove and memcpy share an implementation on amd64
+INTERCEPT_LIBRARY_FUNCTION_ASAN(memmove);
+#  endif
+INTERCEPT_LIBRARY_FUNCTION_ASAN(memset);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strcat);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strchr);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strcmp);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strcpy);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strcspn);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(_strdup);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strlen);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strncat);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strncmp);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strncpy);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strnlen);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strpbrk);
+// INTERCEPT_LIBRARY_FUNCTION_ASAN(strrchr);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strspn);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strstr);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strtok);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(strtol);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(wcslen);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(wcsnlen);
+
+#  if defined(_MSC_VER) && !defined(__clang__)
+#    pragma warning(pop)
+#  endif
+
+#  ifdef _WIN64
+INTERCEPT_LIBRARY_FUNCTION_ASAN(__C_specific_handler);
+#  else
+extern "C" void abort();
+INTERCEPT_LIBRARY_FUNCTION_ASAN(_except_handler3);
+// _except_handler4 checks -GS cookie which is different for each module, so we
+// can't use INTERCEPT_LIBRARY_FUNCTION_ASAN(_except_handler4), need to apply
+// manually
+extern "C" int _except_handler4(void *, void *, void *, void *);
+static int (*real_except_handler4)(void *, void *, void *,
+                                   void *) = &_except_handler4;
+static int intercept_except_handler4(void *a, void *b, void *c, void *d) {
+  __asan_handle_no_return();
+  return real_except_handler4(a, b, c, d);
+}
+#  endif
+
+// Windows specific functions not included in asan_interface.inc.
+// INTERCEPT_WRAP_W_V(__asan_should_detect_stack_use_after_return)
+// INTERCEPT_WRAP_W_V(__asan_get_shadow_memory_dynamic_address)
+// INTERCEPT_WRAP_W_W(__asan_unhandled_exception_filter)
+
+extern "C" void __asan_initialize_static_thunk() {
+#  ifndef _WIN64
+  if (real_except_handler4 == &_except_handler4) {
+    // Single threaded, no need for synchronization.
+    if (!__sanitizer_override_function_by_addr(
+            reinterpret_cast<__sanitizer::uptr>(&intercept_except_handler4),
+            reinterpret_cast<__sanitizer::uptr>(&_except_handler4),
+            reinterpret_cast<__sanitizer::uptr*>(&real_except_handler4))) {
+      abort();
+    }
+  }
+#  endif
+}
+
+#endif  // SANITIZER_DLL_THUNK
diff --git compiler-rt/lib/asan/asan_win_weak_interception.cpp compiler-rt/lib/asan/asan_win_weak_interception.cpp
deleted file mode 100644
index 62534e12e2a6..000000000000
--- compiler-rt/lib/asan/asan_win_weak_interception.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===-- asan_win_weak_interception.cpp ------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// This module should be included in Address Sanitizer when it is implemented as
-// a shared library on Windows (dll), in order to delegate the calls of weak
-// functions to the implementation in the main executable when a strong
-// definition is provided.
-//===----------------------------------------------------------------------===//
-#ifdef SANITIZER_DYNAMIC
-#include "sanitizer_common/sanitizer_win_weak_interception.h"
-#include "asan_interface_internal.h"
-// Check if strong definitions for weak functions are present in the main
-// executable. If that is the case, override dll functions to point to strong
-// implementations.
-#define INTERFACE_FUNCTION(Name)
-#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
-#include "asan_interface.inc"
-#endif // SANITIZER_DYNAMIC
diff --git compiler-rt/lib/asan/tests/CMakeLists.txt compiler-rt/lib/asan/tests/CMakeLists.txt
index b489bb99aeff..0c4b0361ec49 100644
--- compiler-rt/lib/asan/tests/CMakeLists.txt
+++ compiler-rt/lib/asan/tests/CMakeLists.txt
@@ -104,6 +104,7 @@ set(ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS
 set(ASAN_UNITTEST_INSTRUMENTED_LIBS)
 
 set(ASAN_UNITTEST_NOINST_LINK_FLAGS ${ASAN_UNITTEST_COMMON_LINK_FLAGS})
+append_list_if(MSVC -Wl,-nodefaultlib:msvcrt ASAN_UNITTEST_NOINST_LINK_FLAGS)
 if(NOT APPLE)
   append_list_if(COMPILER_RT_HAS_LIBM -lm ASAN_UNITTEST_NOINST_LINK_FLAGS)
   append_list_if(COMPILER_RT_HAS_LIBDL -ldl ASAN_UNITTEST_NOINST_LINK_FLAGS)
@@ -203,7 +204,7 @@ function(add_asan_tests arch test_runtime)
         CFLAGS ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS} -D_MT -D_DLL
         SOURCES ${ASAN_INST_TEST_SOURCES}
         LINK_FLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS}
-          -Wl,-nodefaultlib:libcmt,-defaultlib:msvcrt,-defaultlib:oldnames
+          -D_MT -D_DLL -Wl,-nodefaultlib:libcmt,-defaultlib:msvcrt,-defaultlib:oldnames
         )
     else()
       set(DYNAMIC_LINK_FLAGS)
diff --git compiler-rt/lib/builtins/cpu_model/aarch64.c compiler-rt/lib/builtins/cpu_model/aarch64.c
index 0dd397783b67..ea2da23a9527 100644
--- compiler-rt/lib/builtins/cpu_model/aarch64.c
+++ compiler-rt/lib/builtins/cpu_model/aarch64.c
@@ -14,7 +14,7 @@
 
 #include "aarch64.h"
 
-#if !defined(__aarch64__)
+#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64)
 #error This file is intended only for aarch64-based targets
 #endif
 
diff --git compiler-rt/lib/builtins/cpu_model/aarch64.h compiler-rt/lib/builtins/cpu_model/aarch64.h
index f6cbf75d582f..2a734b02b7c9 100644
--- compiler-rt/lib/builtins/cpu_model/aarch64.h
+++ compiler-rt/lib/builtins/cpu_model/aarch64.h
@@ -8,7 +8,7 @@
 
 #include "cpu_model.h"
 
-#if !defined(__aarch64__)
+#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64)
 #error This file is intended only for aarch64-based targets
 #endif
 
diff --git compiler-rt/lib/builtins/divsc3.c compiler-rt/lib/builtins/divsc3.c
index aa4fd8e79e0c..6529651252c5 100644
--- compiler-rt/lib/builtins/divsc3.c
+++ compiler-rt/lib/builtins/divsc3.c
@@ -20,7 +20,7 @@
 COMPILER_RT_ABI Fcomplex __divsc3(float __a, float __b, float __c, float __d) {
   int __ilogbw = 0;
   float __logbw =
-      __compiler_rt_logbf(__compiler_rt_fmaxf(crt_fabsf(__c), crt_fabsf(__d)));
+      __compiler_rt_logbf(__compiler_rt_fmaxX(crt_fabsf(__c), crt_fabsf(__d)));
   if (crt_isfinite(__logbw)) {
     __ilogbw = (int)__logbw;
     __c = __compiler_rt_scalbnf(__c, -__ilogbw);
diff --git compiler-rt/lib/builtins/fp_lib.h compiler-rt/lib/builtins/fp_lib.h
index b2a89506135b..0289cfd10db6 100644
--- compiler-rt/lib/builtins/fp_lib.h
+++ compiler-rt/lib/builtins/fp_lib.h
@@ -346,15 +346,6 @@ static __inline fp_t __compiler_rt_logbf(fp_t x) {
 static __inline fp_t __compiler_rt_scalbnf(fp_t x, int y) {
   return __compiler_rt_scalbnX(x, y);
 }
-static __inline fp_t __compiler_rt_fmaxf(fp_t x, fp_t y) {
-#if defined(__aarch64__)
-  // Use __builtin_fmaxf which turns into an fmaxnm instruction on AArch64.
-  return __builtin_fmaxf(x, y);
-#else
-  // __builtin_fmaxf frequently turns into a libm call, so inline the function.
-  return __compiler_rt_fmaxX(x, y);
-#endif
-}
 
 #elif defined(DOUBLE_PRECISION)
 
diff --git compiler-rt/lib/ctx_profile/CtxInstrContextNode.h compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
index a916f197aa14..5991458c5732 100644
--- compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
+++ compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
@@ -68,18 +68,19 @@ using GUID = uint64_t;
 class ContextNode final {
   const GUID Guid;
   ContextNode *const Next;
-  const uint32_t NrCounters;
-  const uint32_t NrCallsites;
+  const uint32_t NumCounters;
+  const uint32_t NumCallsites;
 
 public:
-  ContextNode(GUID Guid, uint32_t NrCounters, uint32_t NrCallsites,
+  ContextNode(GUID Guid, uint32_t NumCounters, uint32_t NumCallsites,
               ContextNode *Next = nullptr)
-      : Guid(Guid), Next(Next), NrCounters(NrCounters),
-        NrCallsites(NrCallsites) {}
+      : Guid(Guid), Next(Next), NumCounters(NumCounters),
+        NumCallsites(NumCallsites) {}
 
-  static inline size_t getAllocSize(uint32_t NrCounters, uint32_t NrCallsites) {
-    return sizeof(ContextNode) + sizeof(uint64_t) * NrCounters +
-           sizeof(ContextNode *) * NrCallsites;
+  static inline size_t getAllocSize(uint32_t NumCounters,
+                                    uint32_t NumCallsites) {
+    return sizeof(ContextNode) + sizeof(uint64_t) * NumCounters +
+           sizeof(ContextNode *) * NumCallsites;
   }
 
   // The counters vector starts right after the static header.
@@ -88,8 +89,8 @@ public:
     return reinterpret_cast<uint64_t *>(addr_after);
   }
 
-  uint32_t counters_size() const { return NrCounters; }
-  uint32_t callsites_size() const { return NrCallsites; }
+  uint32_t counters_size() const { return NumCounters; }
+  uint32_t callsites_size() const { return NumCallsites; }
 
   const uint64_t *counters() const {
     return const_cast<ContextNode *>(this)->counters();
@@ -97,7 +98,7 @@ public:
 
   // The subcontexts vector starts right after the end of the counters vector.
   ContextNode **subContexts() {
-    return reinterpret_cast<ContextNode **>(&(counters()[NrCounters]));
+    return reinterpret_cast<ContextNode **>(&(counters()[NumCounters]));
   }
 
   ContextNode *const *subContexts() const {
@@ -107,7 +108,7 @@ public:
   GUID guid() const { return Guid; }
   ContextNode *next() const { return Next; }
 
-  size_t size() const { return getAllocSize(NrCounters, NrCallsites); }
+  size_t size() const { return getAllocSize(NumCounters, NumCallsites); }
 
   uint64_t entrycount() const { return counters()[0]; }
 };
diff --git compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
index a0a535015bf2..df30986cdfc6 100644
--- compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
+++ compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
@@ -92,10 +92,11 @@ bool validate(const ContextRoot *Root) {
 }
 
 inline ContextNode *allocContextNode(char *Place, GUID Guid,
-                                     uint32_t NrCounters, uint32_t NrCallsites,
+                                     uint32_t NumCounters,
+                                     uint32_t NumCallsites,
                                      ContextNode *Next = nullptr) {
   assert(reinterpret_cast<uint64_t>(Place) % ExpectedAlignment == 0);
-  return new (Place) ContextNode(Guid, NrCounters, NrCallsites, Next);
+  return new (Place) ContextNode(Guid, NumCounters, NumCallsites, Next);
 }
 
 void resetContextNode(ContextNode &Node) {
@@ -161,8 +162,8 @@ void Arena::freeArenaList(Arena *&A) {
 // If this is the first time we hit a callsite with this (Guid) particular
 // callee, we need to allocate.
 ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
-                             uint32_t NrCounters, uint32_t NrCallsites) {
-  auto AllocSize = ContextNode::getAllocSize(NrCounters, NrCallsites);
+                             uint32_t NumCounters, uint32_t NumCallsites) {
+  auto AllocSize = ContextNode::getAllocSize(NumCounters, NumCallsites);
   auto *Mem = __llvm_ctx_profile_current_context_root->CurrentMem;
   char *AllocPlace = Mem->tryBumpAllocate(AllocSize);
   if (!AllocPlace) {
@@ -175,15 +176,15 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
         Mem->allocateNewArena(getArenaAllocSize(AllocSize), Mem);
     AllocPlace = Mem->tryBumpAllocate(AllocSize);
   }
-  auto *Ret = allocContextNode(AllocPlace, Guid, NrCounters, NrCallsites,
+  auto *Ret = allocContextNode(AllocPlace, Guid, NumCounters, NumCallsites,
                                *InsertionPoint);
   *InsertionPoint = Ret;
   return Ret;
 }
 
 ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
-                                            uint32_t NrCounters,
-                                            uint32_t NrCallsites) {
+                                            uint32_t NumCounters,
+                                            uint32_t NumCallsites) {
   // fast "out" if we're not even doing contextual collection.
   if (!__llvm_ctx_profile_current_context_root)
     return TheScratchContext;
@@ -222,14 +223,14 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
     Callsite = Callsite->next();
   }
   auto *Ret = Callsite ? Callsite
-                       : getCallsiteSlow(Guid, CallsiteContext, NrCounters,
-                                         NrCallsites);
-  if (Ret->callsites_size() != NrCallsites ||
-      Ret->counters_size() != NrCounters)
+                       : getCallsiteSlow(Guid, CallsiteContext, NumCounters,
+                                         NumCallsites);
+  if (Ret->callsites_size() != NumCallsites ||
+      Ret->counters_size() != NumCounters)
     __sanitizer::Printf("[ctxprof] Returned ctx differs from what's asked: "
                         "Context: %p, Asked: %lu %u %u, Got: %lu %u %u \n",
-                        reinterpret_cast<void *>(Ret), Guid, NrCallsites,
-                        NrCounters, Ret->guid(), Ret->callsites_size(),
+                        reinterpret_cast<void *>(Ret), Guid, NumCallsites,
+                        NumCounters, Ret->guid(), Ret->callsites_size(),
                         Ret->counters_size());
   onContextEnter(*Ret);
   return Ret;
@@ -237,19 +238,19 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
 
 // This should be called once for a Root. Allocate the first arena, set up the
 // first context.
-void setupContext(ContextRoot *Root, GUID Guid, uint32_t NrCounters,
-                  uint32_t NrCallsites) {
+void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
+                  uint32_t NumCallsites) {
   __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
       &AllContextsMutex);
   // Re-check - we got here without having had taken a lock.
   if (Root->FirstMemBlock)
     return;
-  const auto Needed = ContextNode::getAllocSize(NrCounters, NrCallsites);
+  const auto Needed = ContextNode::getAllocSize(NumCounters, NumCallsites);
   auto *M = Arena::allocateNewArena(getArenaAllocSize(Needed));
   Root->FirstMemBlock = M;
   Root->CurrentMem = M;
   Root->FirstNode = allocContextNode(M->tryBumpAllocate(Needed), Guid,
-                                     NrCounters, NrCallsites);
+                                     NumCounters, NumCallsites);
   AllContextRoots.PushBack(Root);
 }
 
@@ -278,7 +279,7 @@ void __llvm_ctx_profile_release_context(ContextRoot *Root)
 }
 
 void __llvm_ctx_profile_start_collection() {
-  size_t NrMemUnits = 0;
+  size_t NumMemUnits = 0;
   __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
       &AllContextsMutex);
   for (uint32_t I = 0; I < AllContextRoots.Size(); ++I) {
@@ -286,11 +287,11 @@ void __llvm_ctx_profile_start_collection() {
     __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> Lock(
         &Root->Taken);
     for (auto *Mem = Root->FirstMemBlock; Mem; Mem = Mem->next())
-      ++NrMemUnits;
+      ++NumMemUnits;
 
     resetContextNode(*Root->FirstNode);
   }
-  __sanitizer::Printf("[ctxprof] Initial NrMemUnits: %zu \n", NrMemUnits);
+  __sanitizer::Printf("[ctxprof] Initial NumMemUnits: %zu \n", NumMemUnits);
 }
 
 bool __llvm_ctx_profile_fetch(void *Data,
diff --git compiler-rt/lib/ctx_profile/CtxInstrProfiling.h compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
index f55068e98dd4..74d346d6e0a0 100644
--- compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
+++ compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
@@ -153,8 +153,8 @@ void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root);
 /// called for any other function than entry points, in the entry BB of such
 /// function. Same consideration about LSB of returned value as .._start_context
 ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
-                                            uint32_t NrCounters,
-                                            uint32_t NrCallsites);
+                                            uint32_t NumCounters,
+                                            uint32_t NumCallsites);
 
 /// Prepares for collection. Currently this resets counter values but preserves
 /// internal context tree structure.
diff --git compiler-rt/lib/dfsan/dfsan_interceptors.cpp compiler-rt/lib/dfsan/dfsan_interceptors.cpp
index 20e95c23c4bc..198e6ee44f94 100644
--- compiler-rt/lib/dfsan/dfsan_interceptors.cpp
+++ compiler-rt/lib/dfsan/dfsan_interceptors.cpp
@@ -39,10 +39,7 @@ INTERCEPTOR(void *, reallocarray, void *ptr, SIZE_T nmemb, SIZE_T size) {
 }
 
 INTERCEPTOR(void *, __libc_memalign, SIZE_T alignment, SIZE_T size) {
-  void *ptr = dfsan_memalign(alignment, size);
-  if (ptr)
-    DTLS_on_libc_memalign(ptr, size);
-  return ptr;
+  return dfsan_memalign(alignment, size);
 }
 
 INTERCEPTOR(void *, aligned_alloc, SIZE_T alignment, SIZE_T size) {
diff --git compiler-rt/lib/hwasan/hwasan_allocation_functions.cpp compiler-rt/lib/hwasan/hwasan_allocation_functions.cpp
index 9af09e2a4bed..25ca0a3b0b68 100644
--- compiler-rt/lib/hwasan/hwasan_allocation_functions.cpp
+++ compiler-rt/lib/hwasan/hwasan_allocation_functions.cpp
@@ -17,7 +17,6 @@
 #include "sanitizer_common/sanitizer_allocator_dlsym.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
 #include "sanitizer_common/sanitizer_mallinfo.h"
-#include "sanitizer_common/sanitizer_tls_get_addr.h"
 
 using namespace __hwasan;
 
@@ -62,10 +61,7 @@ void *__sanitizer_aligned_alloc(uptr alignment, uptr size) {
 SANITIZER_INTERFACE_ATTRIBUTE
 void *__sanitizer___libc_memalign(uptr alignment, uptr size) {
   GET_MALLOC_STACK_TRACE;
-  void *ptr = hwasan_memalign(alignment, size, &stack);
-  if (ptr)
-    DTLS_on_libc_memalign(ptr, size);
-  return ptr;
+  return hwasan_memalign(alignment, size, &stack);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
diff --git compiler-rt/lib/interception/CMakeLists.txt compiler-rt/lib/interception/CMakeLists.txt
index abe9229340be..fe7fa27fbc78 100644
--- compiler-rt/lib/interception/CMakeLists.txt
+++ compiler-rt/lib/interception/CMakeLists.txt
@@ -14,6 +14,14 @@ set(INTERCEPTION_HEADERS
   interception_win.h
   )
 
+if(MSVC)
+  # asan on windows only supports the release dll version of the runtimes, in the interest of
+  # only having one asan dll to support/test. Having asan statically linked
+  # with the runtime might be possible, but it multiplies the number of scenerios to test.
+  # the program USING sanitizers can use whatever version of the runtime it wants to.
+  set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreadedDLL)
+endif()
+
 include_directories(..)
 
 set(INTERCEPTION_CFLAGS ${SANITIZER_COMMON_CFLAGS})
diff --git compiler-rt/lib/lsan/lsan_interceptors.cpp compiler-rt/lib/lsan/lsan_interceptors.cpp
index b569c337e976..a8252cddacf2 100644
--- compiler-rt/lib/lsan/lsan_interceptors.cpp
+++ compiler-rt/lib/lsan/lsan_interceptors.cpp
@@ -26,7 +26,6 @@
 #if SANITIZER_POSIX
 #include "sanitizer_common/sanitizer_posix.h"
 #endif
-#include "sanitizer_common/sanitizer_tls_get_addr.h"
 #include "lsan.h"
 #include "lsan_allocator.h"
 #include "lsan_common.h"
@@ -77,6 +76,8 @@ INTERCEPTOR(void*, malloc, uptr size) {
 }
 
 INTERCEPTOR(void, free, void *p) {
+  if (UNLIKELY(!p))
+    return;
   if (DlsymAlloc::PointerIsMine(p))
     return DlsymAlloc::Free(p);
   ENSURE_LSAN_INITED;
@@ -133,9 +134,7 @@ INTERCEPTOR(void*, memalign, uptr alignment, uptr size) {
 INTERCEPTOR(void *, __libc_memalign, uptr alignment, uptr size) {
   ENSURE_LSAN_INITED;
   GET_STACK_TRACE_MALLOC;
-  void *res = lsan_memalign(alignment, size, stack);
-  DTLS_on_libc_memalign(res, size);
-  return res;
+  return lsan_memalign(alignment, size, stack);
 }
 #define LSAN_MAYBE_INTERCEPT___LIBC_MEMALIGN INTERCEPT_FUNCTION(__libc_memalign)
 #else
diff --git compiler-rt/lib/memprof/memprof_malloc_linux.cpp compiler-rt/lib/memprof/memprof_malloc_linux.cpp
index aba6295a4a04..2a028c7d0b48 100644
--- compiler-rt/lib/memprof/memprof_malloc_linux.cpp
+++ compiler-rt/lib/memprof/memprof_malloc_linux.cpp
@@ -90,9 +90,7 @@ INTERCEPTOR(void *, memalign, uptr boundary, uptr size) {
 
 INTERCEPTOR(void *, __libc_memalign, uptr boundary, uptr size) {
   GET_STACK_TRACE_MALLOC;
-  void *res = memprof_memalign(boundary, size, &stack, FROM_MALLOC);
-  DTLS_on_libc_memalign(res, size);
-  return res;
+  return memprof_memalign(boundary, size, &stack, FROM_MALLOC);
 }
 #endif // SANITIZER_INTERCEPT_MEMALIGN
 
diff --git compiler-rt/lib/msan/msan_interceptors.cpp compiler-rt/lib/msan/msan_interceptors.cpp
index c540523e0eae..f05c20618780 100644
--- compiler-rt/lib/msan/msan_interceptors.cpp
+++ compiler-rt/lib/msan/msan_interceptors.cpp
@@ -37,7 +37,6 @@
 #include "sanitizer_common/sanitizer_platform_limits_netbsd.h"
 #include "sanitizer_common/sanitizer_platform_limits_posix.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
-#include "sanitizer_common/sanitizer_tls_get_addr.h"
 #include "sanitizer_common/sanitizer_vector.h"
 
 #if SANITIZER_NETBSD
@@ -185,10 +184,7 @@ INTERCEPTOR(void *, aligned_alloc, SIZE_T alignment, SIZE_T size) {
 #if !SANITIZER_NETBSD
 INTERCEPTOR(void *, __libc_memalign, SIZE_T alignment, SIZE_T size) {
   GET_MALLOC_STACK_TRACE;
-  void *ptr = msan_memalign(alignment, size, &stack);
-  if (ptr)
-    DTLS_on_libc_memalign(ptr, size);
-  return ptr;
+  return msan_memalign(alignment, size, &stack);
 }
 #define MSAN_MAYBE_INTERCEPT___LIBC_MEMALIGN INTERCEPT_FUNCTION(__libc_memalign)
 #else
diff --git compiler-rt/lib/nsan/nsan.cpp compiler-rt/lib/nsan/nsan.cpp
index 4be9c673bd4e..4679bcd589eb 100644
--- compiler-rt/lib/nsan/nsan.cpp
+++ compiler-rt/lib/nsan/nsan.cpp
@@ -200,7 +200,14 @@ void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,
                                                  bool request_fast,
                                                  u32 max_depth) {
   using namespace __nsan;
-  return Unwind(max_depth, pc, bp, context, 0, 0, false);
+  NsanThread *t = GetCurrentThread();
+  if (!t || !StackTrace::WillUseFastUnwind(request_fast))
+    return Unwind(max_depth, pc, bp, context, t ? t->stack_top() : 0,
+                  t ? t->stack_bottom() : 0, false);
+  if (StackTrace::WillUseFastUnwind(request_fast))
+    Unwind(max_depth, pc, bp, nullptr, t->stack_top(), t->stack_bottom(), true);
+  else
+    Unwind(max_depth, pc, 0, context, 0, 0, false);
 }
 
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_print_accumulated_stats() {
diff --git compiler-rt/lib/orc/adt.h compiler-rt/lib/orc/adt.h
index 8884cc8812be..43fa9a8ea678 100644
--- compiler-rt/lib/orc/adt.h
+++ compiler-rt/lib/orc/adt.h
@@ -18,7 +18,7 @@
 #include <ostream>
 #include <string>
 
-namespace __orc_rt {
+namespace orc_rt {
 
 constexpr std::size_t dynamic_extent = std::numeric_limits<std::size_t>::max();
 
@@ -58,6 +58,6 @@ private:
   size_type Size = 0;
 };
 
-} // end namespace __orc_rt
+} // namespace orc_rt
 
 #endif // ORC_RT_ADT_H
diff --git compiler-rt/lib/orc/bitmask_enum.h compiler-rt/lib/orc/bitmask_enum.h
index b9fb776bdf23..77f4ca6b0fd6 100644
--- compiler-rt/lib/orc/bitmask_enum.h
+++ compiler-rt/lib/orc/bitmask_enum.h
@@ -18,7 +18,7 @@
 #include <cassert>
 #include <type_traits>
 
-namespace __orc_rt {
+namespace orc_rt {
 
 /// ORC_RT_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you
 /// can perform bitwise operations on it without putting static_cast everywhere.
@@ -146,6 +146,6 @@ E &operator^=(E &LHS, E RHS) {
   return LHS;
 }
 
-} // end namespace __orc_rt
+} // namespace orc_rt
 
 #endif // ORC_RT_BITMASK_ENUM_H
diff --git compiler-rt/lib/orc/coff_platform.cpp compiler-rt/lib/orc/coff_platform.cpp
index 9fe5c0b06289..49b805a0ec7d 100644
--- compiler-rt/lib/orc/coff_platform.cpp
+++ compiler-rt/lib/orc/coff_platform.cpp
@@ -17,6 +17,7 @@
 
 #include "debug.h"
 #include "error.h"
+#include "jit_dispatch.h"
 #include "wrapper_function_utils.h"
 
 #include <array>
@@ -29,9 +30,9 @@
 
 #define DEBUG_TYPE "coff_platform"
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
-namespace __orc_rt {
+namespace orc_rt {
 
 using COFFJITDylibDepInfo = std::vector<ExecutorAddr>;
 using COFFJITDylibDepInfoMap =
@@ -45,7 +46,7 @@ using SPSCOFFJITDylibDepInfo = SPSSequence<SPSExecutorAddr>;
 using SPSCOFFJITDylibDepInfoMap =
     SPSSequence<SPSTuple<SPSExecutorAddr, SPSCOFFJITDylibDepInfo>>;
 
-} // namespace __orc_rt
+} // namespace orc_rt
 
 ORC_RT_JIT_DISPATCH_TAG(__orc_rt_coff_symbol_lookup_tag)
 ORC_RT_JIT_DISPATCH_TAG(__orc_rt_coff_push_initializers_tag)
@@ -315,9 +316,9 @@ Error COFFPlatformRuntimeState::dlopenFull(JITDylibState &JDS) {
   // Call back to the JIT to push the initializers.
   Expected<COFFJITDylibDepInfoMap> DepInfoMap((COFFJITDylibDepInfoMap()));
   if (auto Err = WrapperFunction<SPSExpected<SPSCOFFJITDylibDepInfoMap>(
-          SPSExecutorAddr)>::call(&__orc_rt_coff_push_initializers_tag,
-                                  DepInfoMap,
-                                  ExecutorAddr::fromPtr(JDS.Header)))
+          SPSExecutorAddr)>::
+          call(JITDispatch(&__orc_rt_coff_push_initializers_tag), DepInfoMap,
+               ExecutorAddr::fromPtr(JDS.Header)))
     return Err;
   if (!DepInfoMap)
     return DepInfoMap.takeError();
@@ -445,10 +446,9 @@ COFFPlatformRuntimeState::lookupSymbolInJITDylib(void *header,
                                                  std::string_view Sym) {
   Expected<ExecutorAddr> Result((ExecutorAddr()));
   if (auto Err = WrapperFunction<SPSExpected<SPSExecutorAddr>(
-          SPSExecutorAddr, SPSString)>::call(&__orc_rt_coff_symbol_lookup_tag,
-                                             Result,
-                                             ExecutorAddr::fromPtr(header),
-                                             Sym))
+          SPSExecutorAddr,
+          SPSString)>::call(JITDispatch(&__orc_rt_coff_symbol_lookup_tag),
+                            Result, ExecutorAddr::fromPtr(header), Sym))
     return std::move(Err);
   return Result;
 }
@@ -752,7 +752,7 @@ ORC_RT_INTERFACE int64_t __orc_rt_coff_run_program(const char *JITDylibName,
   using MainTy = int (*)(int, char *[]);
 
   void *H =
-      __orc_rt_coff_jit_dlopen(JITDylibName, __orc_rt::coff::ORC_RT_RTLD_LAZY);
+      __orc_rt_coff_jit_dlopen(JITDylibName, orc_rt::coff::ORC_RT_RTLD_LAZY);
   if (!H) {
     __orc_rt_log_error(__orc_rt_coff_jit_dlerror());
     return -1;
diff --git compiler-rt/lib/orc/coff_platform.h compiler-rt/lib/orc/coff_platform.h
index c84185d40b60..aae57bc6100e 100644
--- compiler-rt/lib/orc/coff_platform.h
+++ compiler-rt/lib/orc/coff_platform.h
@@ -23,7 +23,7 @@ ORC_RT_INTERFACE int __orc_rt_coff_jit_dlclose(void *header);
 ORC_RT_INTERFACE void *__orc_rt_coff_jit_dlsym(void *header,
                                                const char *symbol);
 
-namespace __orc_rt {
+namespace orc_rt {
 namespace coff {
 
 enum dlopen_mode : int {
@@ -33,7 +33,7 @@ enum dlopen_mode : int {
   ORC_RT_RTLD_GLOBAL = 0x8
 };
 
-} // end namespace coff
-} // end namespace __orc_rt
+} // namespace coff
+} // namespace orc_rt
 
 #endif
diff --git compiler-rt/lib/orc/debug.cpp compiler-rt/lib/orc/debug.cpp
index af20fa4e6f4e..341fceaf002c 100644
--- compiler-rt/lib/orc/debug.cpp
+++ compiler-rt/lib/orc/debug.cpp
@@ -18,8 +18,7 @@
 #include <cstdlib>
 #include <cstring>
 
-
-namespace __orc_rt {
+namespace orc_rt {
 
 #ifndef NDEBUG
 
@@ -80,4 +79,4 @@ void printdbg(const char *format, ...) {
 
 #endif // !NDEBUG
 
-} // end namespace __orc_rt
+} // namespace orc_rt
diff --git compiler-rt/lib/orc/debug.h compiler-rt/lib/orc/debug.h
index a0bc653d032e..587994e484af 100644
--- compiler-rt/lib/orc/debug.h
+++ compiler-rt/lib/orc/debug.h
@@ -17,7 +17,7 @@
 
 #ifndef NDEBUG
 
-namespace __orc_rt {
+namespace orc_rt {
 
 extern std::atomic<const char *> DebugTypes;
 extern char DebugTypesAll;
@@ -27,18 +27,16 @@ const char *initializeDebug();
 bool debugTypeEnabled(const char *Type, const char *Types);
 void printdbg(const char *format, ...);
 
-} // namespace __orc_rt
+} // namespace orc_rt
 
 #define ORC_RT_DEBUG_WITH_TYPE(TYPE, X)                                        \
   do {                                                                         \
-    const char *Types =                                                        \
-        ::__orc_rt::DebugTypes.load(std::memory_order_relaxed);                \
+    const char *Types = ::orc_rt::DebugTypes.load(std::memory_order_relaxed);  \
     if (!Types)                                                                \
       Types = initializeDebug();                                               \
     if (Types == &DebugTypesNone)                                              \
       break;                                                                   \
-    if (Types == &DebugTypesAll ||                                             \
-        ::__orc_rt::debugTypeEnabled(TYPE, Types)) {                           \
+    if (Types == &DebugTypesAll || ::orc_rt::debugTypeEnabled(TYPE, Types)) {  \
       X;                                                                       \
     }                                                                          \
   } while (false)
diff --git compiler-rt/lib/orc/dlfcn_wrapper.cpp compiler-rt/lib/orc/dlfcn_wrapper.cpp
index ece63da2cb48..bbbc79f607f2 100644
--- compiler-rt/lib/orc/dlfcn_wrapper.cpp
+++ compiler-rt/lib/orc/dlfcn_wrapper.cpp
@@ -16,10 +16,11 @@
 
 #include <vector>
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 extern "C" const char *__orc_rt_jit_dlerror();
 extern "C" void *__orc_rt_jit_dlopen(const char *path, int mode);
+extern "C" int __orc_rt_jit_dlupdate(void *dso_handle, int mode);
 extern "C" int __orc_rt_jit_dlclose(void *dso_handle);
 
 ORC_RT_INTERFACE orc_rt_CWrapperFunctionResult
@@ -41,6 +42,18 @@ __orc_rt_jit_dlopen_wrapper(const char *ArgData, size_t ArgSize) {
       .release();
 }
 
+#ifdef __APPLE__
+ORC_RT_INTERFACE orc_rt_CWrapperFunctionResult
+__orc_rt_jit_dlupdate_wrapper(const char *ArgData, size_t ArgSize) {
+  return WrapperFunction<int32_t(SPSExecutorAddr, int32_t)>::handle(
+             ArgData, ArgSize,
+             [](ExecutorAddr &DSOHandle, int32_t mode) {
+               return __orc_rt_jit_dlupdate(DSOHandle.toPtr<void *>(), mode);
+             })
+      .release();
+}
+#endif
+
 ORC_RT_INTERFACE orc_rt_CWrapperFunctionResult
 __orc_rt_jit_dlclose_wrapper(const char *ArgData, size_t ArgSize) {
   return WrapperFunction<int32_t(SPSExecutorAddr)>::handle(
diff --git compiler-rt/lib/orc/elfnix_platform.cpp compiler-rt/lib/orc/elfnix_platform.cpp
index c087e71038f9..bd76e3ed0ece 100644
--- compiler-rt/lib/orc/elfnix_platform.cpp
+++ compiler-rt/lib/orc/elfnix_platform.cpp
@@ -14,6 +14,7 @@
 #include "common.h"
 #include "compiler.h"
 #include "error.h"
+#include "jit_dispatch.h"
 #include "wrapper_function_utils.h"
 
 #include <algorithm>
@@ -24,8 +25,8 @@
 #include <unordered_map>
 #include <vector>
 
-using namespace __orc_rt;
-using namespace __orc_rt::elfnix;
+using namespace orc_rt;
+using namespace orc_rt::elfnix;
 
 // Declare function tags for functions in the JIT process.
 ORC_RT_JIT_DISPATCH_TAG(__orc_rt_elfnix_get_initializers_tag)
@@ -352,10 +353,9 @@ ELFNixPlatformRuntimeState::lookupSymbolInJITDylib(void *DSOHandle,
                                                    std::string_view Sym) {
   Expected<ExecutorAddr> Result((ExecutorAddr()));
   if (auto Err = WrapperFunction<SPSExpected<SPSExecutorAddr>(
-          SPSExecutorAddr, SPSString)>::call(&__orc_rt_elfnix_symbol_lookup_tag,
-                                             Result,
-                                             ExecutorAddr::fromPtr(DSOHandle),
-                                             Sym))
+          SPSExecutorAddr,
+          SPSString)>::call(JITDispatch(&__orc_rt_elfnix_symbol_lookup_tag),
+                            Result, ExecutorAddr::fromPtr(DSOHandle), Sym))
     return std::move(Err);
   return Result;
 }
@@ -368,8 +368,9 @@ ELFNixPlatformRuntimeState::getJITDylibInitializersByName(
   std::string PathStr(Path.data(), Path.size());
   if (auto Err =
           WrapperFunction<SPSExpected<SPSELFNixJITDylibInitializerSequence>(
-              SPSString)>::call(&__orc_rt_elfnix_get_initializers_tag, Result,
-                                Path))
+              SPSString)>::
+              call(JITDispatch(&__orc_rt_elfnix_get_initializers_tag), Result,
+                   Path))
     return std::move(Err);
   return Result;
 }
@@ -601,7 +602,7 @@ ORC_RT_INTERFACE int64_t __orc_rt_elfnix_run_program(
   using MainTy = int (*)(int, char *[]);
 
   void *H = __orc_rt_elfnix_jit_dlopen(JITDylibName,
-                                       __orc_rt::elfnix::ORC_RT_RTLD_LAZY);
+                                       orc_rt::elfnix::ORC_RT_RTLD_LAZY);
   if (!H) {
     __orc_rt_log_error(__orc_rt_elfnix_jit_dlerror());
     return -1;
diff --git compiler-rt/lib/orc/elfnix_platform.h compiler-rt/lib/orc/elfnix_platform.h
index e0ee9591dfc6..3efac4b2327f 100644
--- compiler-rt/lib/orc/elfnix_platform.h
+++ compiler-rt/lib/orc/elfnix_platform.h
@@ -29,7 +29,7 @@ ORC_RT_INTERFACE int __orc_rt_elfnix_jit_dlclose(void *dso_handle);
 ORC_RT_INTERFACE void *__orc_rt_elfnix_jit_dlsym(void *dso_handle,
                                                  const char *symbol);
 
-namespace __orc_rt {
+namespace orc_rt {
 namespace elfnix {
 
 struct ELFNixPerObjectSectionsToRegister {
@@ -65,7 +65,7 @@ enum dlopen_mode : int {
   ORC_RT_RTLD_GLOBAL = 0x8
 };
 
-} // end namespace elfnix
+} // namespace elfnix
 
 using SPSELFNixPerObjectSectionsToRegister =
     SPSTuple<SPSExecutorAddrRange, SPSExecutorAddrRange>;
@@ -126,6 +126,6 @@ public:
   }
 };
 
-} // end namespace __orc_rt
+} // namespace orc_rt
 
 #endif // ORC_RT_ELFNIX_PLATFORM_H
diff --git compiler-rt/lib/orc/endianness.h compiler-rt/lib/orc/endianness.h
index 4ee5505ce6dd..ac65d83744c7 100644
--- compiler-rt/lib/orc/endianness.h
+++ compiler-rt/lib/orc/endianness.h
@@ -46,7 +46,7 @@
 #endif
 #endif
 
-namespace __orc_rt {
+namespace orc_rt {
 
 /// ByteSwap_16 - This function returns a byte-swapped representation of
 /// the 16-bit argument.
@@ -138,6 +138,6 @@ template <typename T> inline void swapByteOrder(T &Value) {
   Value = getSwappedBytes(Value);
 }
 
-} // end namespace __orc_rt
+} // namespace orc_rt
 
 #endif // ORC_RT_ENDIAN_H
diff --git compiler-rt/lib/orc/error.h compiler-rt/lib/orc/error.h
index b5da0769c637..c833313ee55c 100644
--- compiler-rt/lib/orc/error.h
+++ compiler-rt/lib/orc/error.h
@@ -18,7 +18,7 @@
 #include <string>
 #include <type_traits>
 
-namespace __orc_rt {
+namespace orc_rt {
 
 /// Base class for all errors.
 class ErrorInfoBase : public RTTIExtends<ErrorInfoBase, RTTIRoot> {
@@ -421,6 +421,6 @@ private:
   std::string ErrMsg;
 };
 
-} // end namespace __orc_rt
+} // namespace orc_rt
 
 #endif // ORC_RT_ERROR_H
diff --git compiler-rt/lib/orc/executor_address.h compiler-rt/lib/orc/executor_address.h
index 1542ee96bd92..56a5e88c84a2 100644
--- compiler-rt/lib/orc/executor_address.h
+++ compiler-rt/lib/orc/executor_address.h
@@ -22,7 +22,7 @@
 #include <cassert>
 #include <type_traits>
 
-namespace __orc_rt {
+namespace orc_rt {
 
 using ExecutorAddrDiff = uint64_t;
 
@@ -30,7 +30,7 @@ using ExecutorAddrDiff = uint64_t;
 class ExecutorAddr {
 public:
   /// A wrap/unwrap function that leaves pointers unmodified.
-  template <typename T> using rawPtr = __orc_rt::identity<T *>;
+  template <typename T> using rawPtr = orc_rt::identity<T *>;
 
   /// Default wrap function to use on this host.
   template <typename T> using defaultWrap = rawPtr<T>;
@@ -247,13 +247,13 @@ public:
 
 using SPSExecutorAddrRangeSequence = SPSSequence<SPSExecutorAddrRange>;
 
-} // End namespace __orc_rt
+} // End namespace orc_rt
 
 namespace std {
 
 // Make ExecutorAddr hashable.
-template <> struct hash<__orc_rt::ExecutorAddr> {
-  size_t operator()(const __orc_rt::ExecutorAddr &A) const {
+template <> struct hash<orc_rt::ExecutorAddr> {
+  size_t operator()(const orc_rt::ExecutorAddr &A) const {
     return hash<uint64_t>()(A.getValue());
   }
 };
diff --git compiler-rt/lib/orc/executor_symbol_def.h compiler-rt/lib/orc/executor_symbol_def.h
index 454cefe525cf..78c3c35e50b6 100644
--- compiler-rt/lib/orc/executor_symbol_def.h
+++ compiler-rt/lib/orc/executor_symbol_def.h
@@ -20,7 +20,7 @@
 #include "executor_address.h"
 #include "simple_packed_serialization.h"
 
-namespace __orc_rt {
+namespace orc_rt {
 
 /// Flags for symbols in the JIT.
 class JITSymbolFlags {
@@ -146,6 +146,6 @@ public:
   }
 };
 
-} // End namespace __orc_rt
+} // End namespace orc_rt
 
 #endif // ORC_RT_EXECUTOR_SYMBOL_DEF_H
diff --git compiler-rt/lib/orc/extensible_rtti.cpp compiler-rt/lib/orc/extensible_rtti.cpp
index c6951a449a3d..fce7d044a801 100644
--- compiler-rt/lib/orc/extensible_rtti.cpp
+++ compiler-rt/lib/orc/extensible_rtti.cpp
@@ -16,9 +16,9 @@
 
 #include "extensible_rtti.h"
 
-namespace __orc_rt {
+namespace orc_rt {
 
 char RTTIRoot::ID = 0;
 void RTTIRoot::anchor() {}
 
-} // end namespace __orc_rt
+} // namespace orc_rt
diff --git compiler-rt/lib/orc/extensible_rtti.h compiler-rt/lib/orc/extensible_rtti.h
index 72f68242e7c4..d0363b4f1ee0 100644
--- compiler-rt/lib/orc/extensible_rtti.h
+++ compiler-rt/lib/orc/extensible_rtti.h
@@ -59,7 +59,7 @@
 #ifndef ORC_RT_EXTENSIBLE_RTTI_H
 #define ORC_RT_EXTENSIBLE_RTTI_H
 
-namespace __orc_rt {
+namespace orc_rt {
 
 template <typename ThisT, typename ParentT> class RTTIExtends;
 
@@ -140,6 +140,6 @@ template <typename To, typename From> bool isa(const From &Value) {
   return To::classof(&Value);
 }
 
-} // end namespace __orc_rt
+} // namespace orc_rt
 
 #endif // ORC_RT_EXTENSIBLE_RTTI_H
diff --git compiler-rt/lib/orc/interval_map.h compiler-rt/lib/orc/interval_map.h
index 8c1609d72f57..3db990f1f115 100644
--- compiler-rt/lib/orc/interval_map.h
+++ compiler-rt/lib/orc/interval_map.h
@@ -17,7 +17,7 @@
 #include <cassert>
 #include <map>
 
-namespace __orc_rt {
+namespace orc_rt {
 
 enum class IntervalCoalescing { Enabled, Disabled };
 
@@ -163,6 +163,6 @@ public:
   }
 };
 
-} // End namespace __orc_rt
+} // End namespace orc_rt
 
 #endif // ORC_RT_INTERVAL_MAP_H
diff --git compiler-rt/lib/orc/interval_set.h compiler-rt/lib/orc/interval_set.h
index 20f40f9c7d37..df104a3be642 100644
--- compiler-rt/lib/orc/interval_set.h
+++ compiler-rt/lib/orc/interval_set.h
@@ -15,7 +15,7 @@
 
 #include "interval_map.h"
 
-namespace __orc_rt {
+namespace orc_rt {
 
 /// Implements a coalescing interval set.
 ///
@@ -82,6 +82,6 @@ private:
   ImplMap Map;
 };
 
-} // End namespace __orc_rt
+} // End namespace orc_rt
 
 #endif // ORC_RT_INTERVAL_SET_H
diff --git compiler-rt/lib/orc/jit_dispatch.h compiler-rt/lib/orc/jit_dispatch.h
new file mode 100644
index 000000000000..9b2329fa1e4f
--- /dev/null
+++ compiler-rt/lib/orc/jit_dispatch.h
@@ -0,0 +1,50 @@
+//===------ jit_dispatch.h - Call back to an ORC controller -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of the ORC runtime support library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ORC_RT_JIT_DISPATCH_H
+#define ORC_RT_JIT_DISPATCH_H
+
+#include "common.h"
+#include "wrapper_function_utils.h"
+
+namespace orc_rt {
+
+class JITDispatch {
+public:
+  JITDispatch(const void *FnTag) : FnTag(FnTag) {}
+
+  WrapperFunctionResult operator()(const char *ArgData, size_t ArgSize) {
+    // Since the functions cannot be zero/unresolved on Windows, the following
+    // reference taking would always be non-zero, thus generating a compiler
+    // warning otherwise.
+#if !defined(_WIN32)
+    if (ORC_RT_UNLIKELY(!&__orc_rt_jit_dispatch_ctx))
+      return WrapperFunctionResult::createOutOfBandError(
+                 "__orc_rt_jit_dispatch_ctx not set")
+          .release();
+    if (ORC_RT_UNLIKELY(!&__orc_rt_jit_dispatch))
+      return WrapperFunctionResult::createOutOfBandError(
+                 "__orc_rt_jit_dispatch not set")
+          .release();
+#endif
+
+    return __orc_rt_jit_dispatch(&__orc_rt_jit_dispatch_ctx, FnTag, ArgData,
+                                 ArgSize);
+  }
+
+private:
+  const void *FnTag;
+};
+
+} // namespace orc_rt
+
+#endif // ORC_RT_JIT_DISPATCH_H
diff --git compiler-rt/lib/orc/macho_platform.cpp compiler-rt/lib/orc/macho_platform.cpp
index 9b4f6e5fd417..8cc3594b5d0c 100644
--- compiler-rt/lib/orc/macho_platform.cpp
+++ compiler-rt/lib/orc/macho_platform.cpp
@@ -16,6 +16,7 @@
 #include "debug.h"
 #include "error.h"
 #include "interval_map.h"
+#include "jit_dispatch.h"
 #include "wrapper_function_utils.h"
 
 #include <algorithm>
@@ -30,8 +31,8 @@
 
 #define DEBUG_TYPE "macho_platform"
 
-using namespace __orc_rt;
-using namespace __orc_rt::macho;
+using namespace orc_rt;
+using namespace orc_rt::macho;
 
 // Declare function tags for functions in the JIT process.
 ORC_RT_JIT_DISPATCH_TAG(__orc_rt_macho_push_initializers_tag)
@@ -82,7 +83,7 @@ using MachOJITDylibDepInfoMap =
 
 } // anonymous namespace
 
-namespace __orc_rt {
+namespace orc_rt {
 
 using SPSMachOObjectPlatformSectionsMap =
     SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRange>>;
@@ -139,7 +140,7 @@ public:
   }
 };
 
-} // namespace __orc_rt
+} // namespace orc_rt
 
 namespace {
 struct TLVDescriptor {
@@ -330,6 +331,7 @@ public:
 
   const char *dlerror();
   void *dlopen(std::string_view Name, int Mode);
+  int dlupdate(void *DSOHandle, int Mode);
   int dlclose(void *DSOHandle);
   void *dlsym(void *DSOHandle, const char *Symbol);
 
@@ -379,6 +381,12 @@ private:
   Error dlopenInitialize(std::unique_lock<std::mutex> &JDStatesLock,
                          JITDylibState &JDS, MachOJITDylibDepInfoMap &DepInfo);
 
+  Error dlupdateImpl(void *DSOHandle, int Mode);
+  Error dlupdateFull(std::unique_lock<std::mutex> &JDStatesLock,
+                     JITDylibState &JDS);
+  Error dlupdateInitialize(std::unique_lock<std::mutex> &JDStatesLock,
+                           JITDylibState &JDS);
+
   Error dlcloseImpl(void *DSOHandle);
   Error dlcloseDeinitialize(std::unique_lock<std::mutex> &JDStatesLock,
                             JITDylibState &JDS);
@@ -406,7 +414,7 @@ private:
 
 } // anonymous namespace
 
-namespace __orc_rt {
+namespace orc_rt {
 
 class SPSMachOExecutorSymbolFlags;
 
@@ -441,7 +449,7 @@ public:
   }
 };
 
-} // namespace __orc_rt
+} // namespace orc_rt
 
 namespace {
 
@@ -788,6 +796,20 @@ void *MachOPlatformRuntimeState::dlopen(std::string_view Path, int Mode) {
   }
 }
 
+int MachOPlatformRuntimeState::dlupdate(void *DSOHandle, int Mode) {
+  ORC_RT_DEBUG({
+    std::string S;
+    printdbg("MachOPlatform::dlupdate(%p) (%s)\n", DSOHandle, S.c_str());
+  });
+  std::lock_guard<std::recursive_mutex> Lock(DyldAPIMutex);
+  if (auto Err = dlupdateImpl(DSOHandle, Mode)) {
+    // FIXME: Make dlerror thread safe.
+    DLFcnError = toString(std::move(Err));
+    return -1;
+  }
+  return 0;
+}
+
 int MachOPlatformRuntimeState::dlclose(void *DSOHandle) {
   ORC_RT_DEBUG({
     auto *JDS = getJITDylibStateByHeader(DSOHandle);
@@ -915,7 +937,7 @@ Error MachOPlatformRuntimeState::requestPushSymbols(
   Error OpErr = Error::success();
   if (auto Err = WrapperFunction<SPSError(
           SPSExecutorAddr, SPSSequence<SPSTuple<SPSString, bool>>)>::
-          call(&__orc_rt_macho_push_symbols_tag, OpErr,
+          call(JITDispatch(&__orc_rt_macho_push_symbols_tag), OpErr,
                ExecutorAddr::fromPtr(JDS.Header), Symbols)) {
     cantFail(std::move(OpErr));
     return std::move(Err);
@@ -1145,8 +1167,9 @@ Error MachOPlatformRuntimeState::dlopenFull(
   // Unlock so that we can accept the initializer update.
   JDStatesLock.unlock();
   if (auto Err = WrapperFunction<SPSExpected<SPSMachOJITDylibDepInfoMap>(
-          SPSExecutorAddr)>::call(&__orc_rt_macho_push_initializers_tag,
-                                  DepInfo, ExecutorAddr::fromPtr(JDS.Header)))
+          SPSExecutorAddr)>::
+          call(JITDispatch(&__orc_rt_macho_push_initializers_tag), DepInfo,
+               ExecutorAddr::fromPtr(JDS.Header)))
     return Err;
   JDStatesLock.lock();
 
@@ -1242,6 +1265,67 @@ Error MachOPlatformRuntimeState::dlopenInitialize(
   return Error::success();
 }
 
+Error MachOPlatformRuntimeState::dlupdateImpl(void *DSOHandle, int Mode) {
+  std::unique_lock<std::mutex> Lock(JDStatesMutex);
+
+  // Try to find JITDylib state by DSOHandle.
+  auto *JDS = getJITDylibStateByHeader(DSOHandle);
+
+  if (!JDS) {
+    std::ostringstream ErrStream;
+    ErrStream << "No registered JITDylib for " << DSOHandle;
+    return make_error<StringError>(ErrStream.str());
+  }
+
+  if (!JDS->referenced())
+    return make_error<StringError>("dlupdate failed, JITDylib must be open.");
+
+  if (!JDS->Sealed) {
+    if (auto Err = dlupdateFull(Lock, *JDS))
+      return Err;
+  }
+
+  return Error::success();
+}
+
+Error MachOPlatformRuntimeState::dlupdateFull(
+    std::unique_lock<std::mutex> &JDStatesLock, JITDylibState &JDS) {
+  // Call back to the JIT to push the initializers.
+  Expected<MachOJITDylibDepInfoMap> DepInfo((MachOJITDylibDepInfoMap()));
+  // Unlock so that we can accept the initializer update.
+  JDStatesLock.unlock();
+  if (auto Err = WrapperFunction<SPSExpected<SPSMachOJITDylibDepInfoMap>(
+          SPSExecutorAddr)>::
+          call(JITDispatch(&__orc_rt_macho_push_initializers_tag), DepInfo,
+               ExecutorAddr::fromPtr(JDS.Header)))
+    return Err;
+  JDStatesLock.lock();
+
+  if (!DepInfo)
+    return DepInfo.takeError();
+
+  if (auto Err = dlupdateInitialize(JDStatesLock, JDS))
+    return Err;
+
+  return Error::success();
+}
+
+Error MachOPlatformRuntimeState::dlupdateInitialize(
+    std::unique_lock<std::mutex> &JDStatesLock, JITDylibState &JDS) {
+  ORC_RT_DEBUG({
+    printdbg("MachOPlatformRuntimeState::dlupdateInitialize(\"%s\")\n",
+             JDS.Name.c_str());
+  });
+
+  // Initialize this JITDylib.
+  if (auto Err = registerObjCRegistrationObjects(JDStatesLock, JDS))
+    return Err;
+  if (auto Err = runModInits(JDStatesLock, JDS))
+    return Err;
+
+  return Error::success();
+}
+
 Error MachOPlatformRuntimeState::dlcloseImpl(void *DSOHandle) {
   std::unique_lock<std::mutex> Lock(JDStatesMutex);
 
@@ -1515,6 +1599,10 @@ void *__orc_rt_macho_jit_dlopen(const char *path, int mode) {
   return MachOPlatformRuntimeState::get().dlopen(path, mode);
 }
 
+int __orc_rt_macho_jit_dlupdate(void *dso_handle, int mode) {
+  return MachOPlatformRuntimeState::get().dlupdate(dso_handle, mode);
+}
+
 int __orc_rt_macho_jit_dlclose(void *dso_handle) {
   return MachOPlatformRuntimeState::get().dlclose(dso_handle);
 }
@@ -1532,8 +1620,8 @@ ORC_RT_INTERFACE int64_t __orc_rt_macho_run_program(const char *JITDylibName,
                                                     int argc, char *argv[]) {
   using MainTy = int (*)(int, char *[]);
 
-  void *H = __orc_rt_macho_jit_dlopen(JITDylibName,
-                                      __orc_rt::macho::ORC_RT_RTLD_LAZY);
+  void *H =
+      __orc_rt_macho_jit_dlopen(JITDylibName, orc_rt::macho::ORC_RT_RTLD_LAZY);
   if (!H) {
     __orc_rt_log_error(__orc_rt_macho_jit_dlerror());
     return -1;
diff --git compiler-rt/lib/orc/macho_platform.h compiler-rt/lib/orc/macho_platform.h
index 3b2242ab27ce..ad70c97809d2 100644
--- compiler-rt/lib/orc/macho_platform.h
+++ compiler-rt/lib/orc/macho_platform.h
@@ -24,11 +24,12 @@ ORC_RT_INTERFACE void __orc_rt_macho_cxa_finalize(void *dso_handle);
 // dlfcn functions.
 ORC_RT_INTERFACE const char *__orc_rt_macho_jit_dlerror();
 ORC_RT_INTERFACE void *__orc_rt_macho_jit_dlopen(const char *path, int mode);
+ORC_RT_INTERFACE int __orc_rt_macho_jit_dlupdate(void *dso_handle, int mode);
 ORC_RT_INTERFACE int __orc_rt_macho_jit_dlclose(void *dso_handle);
 ORC_RT_INTERFACE void *__orc_rt_macho_jit_dlsym(void *dso_handle,
                                                 const char *symbol);
 
-namespace __orc_rt {
+namespace orc_rt {
 namespace macho {
 
 enum dlopen_mode : int {
@@ -38,7 +39,7 @@ enum dlopen_mode : int {
   ORC_RT_RTLD_GLOBAL = 0x8
 };
 
-} // end namespace macho
-} // end namespace __orc_rt
+} // namespace macho
+} // namespace orc_rt
 
 #endif // ORC_RT_MACHO_PLATFORM_H
diff --git compiler-rt/lib/orc/run_program_wrapper.cpp compiler-rt/lib/orc/run_program_wrapper.cpp
index 24a7b4fc3cbe..c9565e742908 100644
--- compiler-rt/lib/orc/run_program_wrapper.cpp
+++ compiler-rt/lib/orc/run_program_wrapper.cpp
@@ -16,7 +16,7 @@
 
 #include <vector>
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 extern "C" int64_t __orc_rt_run_program(const char *JITDylibName,
                                         const char *EntrySymbolName, int argc,
diff --git compiler-rt/lib/orc/simple_packed_serialization.h compiler-rt/lib/orc/simple_packed_serialization.h
index 488d2407ddd4..b4726da08e34 100644
--- compiler-rt/lib/orc/simple_packed_serialization.h
+++ compiler-rt/lib/orc/simple_packed_serialization.h
@@ -48,7 +48,7 @@
 #include <utility>
 #include <vector>
 
-namespace __orc_rt {
+namespace orc_rt {
 
 /// Output char buffer with overflow check.
 class SPSOutputBuffer {
@@ -576,7 +576,7 @@ Expected<T> fromSPSSerializable(SPSSerializableExpected<T> BSE) {
     return make_error<StringError>(BSE.ErrMsg);
 }
 
-} // end namespace detail
+} // namespace detail
 
 /// Serialize to a SPSError from a detail::SPSSerializableError.
 template <>
@@ -684,6 +684,6 @@ public:
   }
 };
 
-} // end namespace __orc_rt
+} // namespace orc_rt
 
 #endif // ORC_RT_SIMPLE_PACKED_SERIALIZATION_H
diff --git compiler-rt/lib/orc/stl_extras.h compiler-rt/lib/orc/stl_extras.h
index 80a6cd13ac28..ad7b66102406 100644
--- compiler-rt/lib/orc/stl_extras.h
+++ compiler-rt/lib/orc/stl_extras.h
@@ -17,7 +17,7 @@
 #include <utility>
 #include <tuple>
 
-namespace __orc_rt {
+namespace orc_rt {
 
 /// Substitute for std::identity.
 /// Switch to std::identity once we can use c++20.
@@ -40,6 +40,6 @@ constexpr uint64_t bit_ceil(uint64_t Val) noexcept {
   return Val + 1;
 }
 
-} // namespace __orc_rt
+} // namespace orc_rt
 
 #endif // ORC_RT_STL_EXTRAS
diff --git compiler-rt/lib/orc/string_pool.h compiler-rt/lib/orc/string_pool.h
index c0ba4ea8980e..fb93e4ff7ff0 100644
--- compiler-rt/lib/orc/string_pool.h
+++ compiler-rt/lib/orc/string_pool.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <unordered_map>
 
-namespace __orc_rt {
+namespace orc_rt {
 
 class PooledStringPtr;
 
@@ -156,14 +156,14 @@ inline bool StringPool::empty() const {
   return Pool.empty();
 }
 
-} // end namespace __orc_rt
+} // namespace orc_rt
 
 namespace std {
 
 // Make PooledStringPtrs hashable.
-template <> struct hash<__orc_rt::PooledStringPtr> {
-  size_t operator()(const __orc_rt::PooledStringPtr &A) const {
-    return hash<__orc_rt::PooledStringPtr::PoolEntryPtr>()(A.S);
+template <> struct hash<orc_rt::PooledStringPtr> {
+  size_t operator()(const orc_rt::PooledStringPtr &A) const {
+    return hash<orc_rt::PooledStringPtr::PoolEntryPtr>()(A.S);
   }
 };
 
diff --git compiler-rt/lib/orc/tests/unit/adt_test.cpp compiler-rt/lib/orc/tests/unit/adt_test.cpp
index 6625a590e363..b93978ecab31 100644
--- compiler-rt/lib/orc/tests/unit/adt_test.cpp
+++ compiler-rt/lib/orc/tests/unit/adt_test.cpp
@@ -16,7 +16,7 @@
 #include <sstream>
 #include <string>
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 TEST(ADTTest, SpanDefaultConstruction) {
   span<int> S;
diff --git compiler-rt/lib/orc/tests/unit/bitmask_enum_test.cpp compiler-rt/lib/orc/tests/unit/bitmask_enum_test.cpp
index 4c27d54fb4a9..8fb92e258876 100644
--- compiler-rt/lib/orc/tests/unit/bitmask_enum_test.cpp
+++ compiler-rt/lib/orc/tests/unit/bitmask_enum_test.cpp
@@ -16,7 +16,7 @@
 #include <sstream>
 #include <string>
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 namespace {
 
@@ -24,9 +24,9 @@ enum Flags { F0 = 0, F1 = 1, F2 = 2, F3 = 4, F4 = 8 };
 
 } // namespace
 
-namespace __orc_rt {
+namespace orc_rt {
 ORC_RT_DECLARE_ENUM_AS_BITMASK(Flags, F4);
-} // namespace __orc_rt
+} // namespace orc_rt
 
 static_assert(is_bitmask_enum<Flags>::value != 0);
 static_assert(largest_bitmask_enum_bit<Flags>::value == Flags::F4);
diff --git compiler-rt/lib/orc/tests/unit/endian_test.cpp compiler-rt/lib/orc/tests/unit/endian_test.cpp
index 71b677af694c..7bc27a2aaa02 100644
--- compiler-rt/lib/orc/tests/unit/endian_test.cpp
+++ compiler-rt/lib/orc/tests/unit/endian_test.cpp
@@ -15,7 +15,7 @@
 #include "endianness.h"
 #include "gtest/gtest.h"
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 TEST(Endian, ByteSwap_32) {
   EXPECT_EQ(0x44332211u, ByteSwap_32(0x11223344));
diff --git compiler-rt/lib/orc/tests/unit/error_test.cpp compiler-rt/lib/orc/tests/unit/error_test.cpp
index 5251d788e01b..ac9f7eddedda 100644
--- compiler-rt/lib/orc/tests/unit/error_test.cpp
+++ compiler-rt/lib/orc/tests/unit/error_test.cpp
@@ -17,7 +17,7 @@
 #include "error.h"
 #include "gtest/gtest.h"
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 namespace {
 
diff --git compiler-rt/lib/orc/tests/unit/executor_address_test.cpp compiler-rt/lib/orc/tests/unit/executor_address_test.cpp
index 05b91f3f8609..1b0626df49cc 100644
--- compiler-rt/lib/orc/tests/unit/executor_address_test.cpp
+++ compiler-rt/lib/orc/tests/unit/executor_address_test.cpp
@@ -17,7 +17,7 @@
 #include "executor_address.h"
 #include "gtest/gtest.h"
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 TEST(ExecutorAddrTest, DefaultAndNull) {
   // Check that default constructed values and isNull behave as expected.
diff --git compiler-rt/lib/orc/tests/unit/executor_symbol_def_test.cpp compiler-rt/lib/orc/tests/unit/executor_symbol_def_test.cpp
index 181091ca1e60..2f7bae29e3d3 100644
--- compiler-rt/lib/orc/tests/unit/executor_symbol_def_test.cpp
+++ compiler-rt/lib/orc/tests/unit/executor_symbol_def_test.cpp
@@ -10,7 +10,7 @@
 #include "simple_packed_serialization_utils.h"
 #include "gtest/gtest.h"
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 TEST(ExecutorSymbolDefTest, Serialization) {
   blobSerializationRoundTrip<SPSExecutorSymbolDef>(ExecutorSymbolDef{});
diff --git compiler-rt/lib/orc/tests/unit/extensible_rtti_test.cpp compiler-rt/lib/orc/tests/unit/extensible_rtti_test.cpp
index feca1ec1d18c..8e8cdee02487 100644
--- compiler-rt/lib/orc/tests/unit/extensible_rtti_test.cpp
+++ compiler-rt/lib/orc/tests/unit/extensible_rtti_test.cpp
@@ -17,7 +17,7 @@
 #include "extensible_rtti.h"
 #include "gtest/gtest.h"
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 namespace {
 
diff --git compiler-rt/lib/orc/tests/unit/interval_map_test.cpp compiler-rt/lib/orc/tests/unit/interval_map_test.cpp
index a1c6958fcd52..272fc2849ee2 100644
--- compiler-rt/lib/orc/tests/unit/interval_map_test.cpp
+++ compiler-rt/lib/orc/tests/unit/interval_map_test.cpp
@@ -13,7 +13,7 @@
 #include "interval_map.h"
 #include "gtest/gtest.h"
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 TEST(IntervalMapTest, DefaultConstructed) {
   // Check that a default-constructed IntervalMap behaves as expected.
diff --git compiler-rt/lib/orc/tests/unit/interval_set_test.cpp compiler-rt/lib/orc/tests/unit/interval_set_test.cpp
index 7971a55f271f..ecd3915d0ccf 100644
--- compiler-rt/lib/orc/tests/unit/interval_set_test.cpp
+++ compiler-rt/lib/orc/tests/unit/interval_set_test.cpp
@@ -13,7 +13,7 @@
 #include "interval_set.h"
 #include "gtest/gtest.h"
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 TEST(IntervalSetTest, DefaultConstructed) {
   // Check that a default-constructed IntervalSet behaves as expected.
diff --git compiler-rt/lib/orc/tests/unit/simple_packed_serialization_test.cpp compiler-rt/lib/orc/tests/unit/simple_packed_serialization_test.cpp
index 397114b4017e..50d1e024a256 100644
--- compiler-rt/lib/orc/tests/unit/simple_packed_serialization_test.cpp
+++ compiler-rt/lib/orc/tests/unit/simple_packed_serialization_test.cpp
@@ -14,7 +14,7 @@
 #include "simple_packed_serialization_utils.h"
 #include "gtest/gtest.h"
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 TEST(SimplePackedSerializationTest, SPSOutputBuffer) {
   constexpr unsigned NumBytes = 8;
diff --git compiler-rt/lib/orc/tests/unit/simple_packed_serialization_utils.h compiler-rt/lib/orc/tests/unit/simple_packed_serialization_utils.h
index 746be43d250b..53b982565d14 100644
--- compiler-rt/lib/orc/tests/unit/simple_packed_serialization_utils.h
+++ compiler-rt/lib/orc/tests/unit/simple_packed_serialization_utils.h
@@ -14,15 +14,15 @@
 
 template <typename SPSTagT, typename T>
 static void blobSerializationRoundTrip(const T &Value) {
-  using BST = __orc_rt::SPSSerializationTraits<SPSTagT, T>;
+  using BST = orc_rt::SPSSerializationTraits<SPSTagT, T>;
 
   size_t Size = BST::size(Value);
   auto Buffer = std::make_unique<char[]>(Size);
-  __orc_rt::SPSOutputBuffer OB(Buffer.get(), Size);
+  orc_rt::SPSOutputBuffer OB(Buffer.get(), Size);
 
   EXPECT_TRUE(BST::serialize(OB, Value));
 
-  __orc_rt::SPSInputBuffer IB(Buffer.get(), Size);
+  orc_rt::SPSInputBuffer IB(Buffer.get(), Size);
 
   T DSValue;
   EXPECT_TRUE(BST::deserialize(IB, DSValue));
diff --git compiler-rt/lib/orc/tests/unit/string_pool_test.cpp compiler-rt/lib/orc/tests/unit/string_pool_test.cpp
index 15ee2ce7d24d..456a1d4d7b29 100644
--- compiler-rt/lib/orc/tests/unit/string_pool_test.cpp
+++ compiler-rt/lib/orc/tests/unit/string_pool_test.cpp
@@ -9,7 +9,7 @@
 #include "string_pool.h"
 #include "gtest/gtest.h"
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 namespace {
 
diff --git compiler-rt/lib/orc/tests/unit/wrapper_function_utils_test.cpp compiler-rt/lib/orc/tests/unit/wrapper_function_utils_test.cpp
index f10c5093046d..91be84f15188 100644
--- compiler-rt/lib/orc/tests/unit/wrapper_function_utils_test.cpp
+++ compiler-rt/lib/orc/tests/unit/wrapper_function_utils_test.cpp
@@ -10,10 +10,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "common.h"
+#include "jit_dispatch.h"
 #include "wrapper_function_utils.h"
 #include "gtest/gtest.h"
 
-using namespace __orc_rt;
+using namespace orc_rt;
 
 namespace {
 constexpr const char *TestString = "test string";
@@ -98,13 +100,14 @@ __orc_rt_jit_dispatch(__orc_rt_Opaque *Ctx, const void *FnTag,
 }
 
 TEST(WrapperFunctionUtilsTest, WrapperFunctionCallVoidNoopAndHandle) {
-  EXPECT_FALSE(!!WrapperFunction<void()>::call((void *)&voidNoopWrapper));
+  EXPECT_FALSE(
+      !!WrapperFunction<void()>::call(JITDispatch((void *)&voidNoopWrapper)));
 }
 
 TEST(WrapperFunctionUtilsTest, WrapperFunctionCallAddWrapperAndHandle) {
   int32_t Result;
   EXPECT_FALSE(!!WrapperFunction<int32_t(int32_t, int32_t)>::call(
-      (void *)&addWrapper, Result, 1, 2));
+      JITDispatch((void *)&addWrapper), Result, 1, 2));
   EXPECT_EQ(Result, (int32_t)3);
 }
 
@@ -128,7 +131,8 @@ TEST(WrapperFunctionUtilsTest, WrapperFunctionMethodCallAndHandleRet) {
   int32_t Result;
   AddClass AddObj(1);
   EXPECT_FALSE(!!WrapperFunction<int32_t(SPSExecutorAddr, int32_t)>::call(
-      (void *)&addMethodWrapper, Result, ExecutorAddr::fromPtr(&AddObj), 2));
+      JITDispatch((void *)&addMethodWrapper), Result,
+      ExecutorAddr::fromPtr(&AddObj), 2));
   EXPECT_EQ(Result, (int32_t)3);
 }
 
diff --git compiler-rt/lib/orc/wrapper_function_utils.h compiler-rt/lib/orc/wrapper_function_utils.h
index 8009438547a3..3e372fe96d2d 100644
--- compiler-rt/lib/orc/wrapper_function_utils.h
+++ compiler-rt/lib/orc/wrapper_function_utils.h
@@ -13,14 +13,13 @@
 #ifndef ORC_RT_WRAPPER_FUNCTION_UTILS_H
 #define ORC_RT_WRAPPER_FUNCTION_UTILS_H
 
-#include "orc_rt/c_api.h"
-#include "common.h"
 #include "error.h"
 #include "executor_address.h"
+#include "orc_rt/c_api.h"
 #include "simple_packed_serialization.h"
 #include <type_traits>
 
-namespace __orc_rt {
+namespace orc_rt {
 
 /// C++ wrapper function result: Same as CWrapperFunctionResult but
 /// auto-releases memory.
@@ -288,30 +287,22 @@ private:
   using ResultSerializer = detail::ResultSerializer<SPSRetTagT, RetT>;
 
 public:
-  template <typename RetT, typename... ArgTs>
-  static Error call(const void *FnTag, RetT &Result, const ArgTs &...Args) {
+  template <typename DispatchFn, typename RetT, typename... ArgTs>
+  static Error call(DispatchFn &&Dispatch, RetT &Result, const ArgTs &...Args) {
 
     // RetT might be an Error or Expected value. Set the checked flag now:
     // we don't want the user to have to check the unused result if this
     // operation fails.
     detail::ResultDeserializer<SPSRetTagT, RetT>::makeSafe(Result);
 
-    // Since the functions cannot be zero/unresolved on Windows, the following
-    // reference taking would always be non-zero, thus generating a compiler
-    // warning otherwise.
-#if !defined(_WIN32)
-    if (ORC_RT_UNLIKELY(!&__orc_rt_jit_dispatch_ctx))
-      return make_error<StringError>("__orc_rt_jit_dispatch_ctx not set");
-    if (ORC_RT_UNLIKELY(!&__orc_rt_jit_dispatch))
-      return make_error<StringError>("__orc_rt_jit_dispatch not set");
-#endif
     auto ArgBuffer =
         WrapperFunctionResult::fromSPSArgs<SPSArgList<SPSTagTs...>>(Args...);
     if (const char *ErrMsg = ArgBuffer.getOutOfBandError())
       return make_error<StringError>(ErrMsg);
 
-    WrapperFunctionResult ResultBuffer = __orc_rt_jit_dispatch(
-        &__orc_rt_jit_dispatch_ctx, FnTag, ArgBuffer.data(), ArgBuffer.size());
+    WrapperFunctionResult ResultBuffer =
+        Dispatch(ArgBuffer.data(), ArgBuffer.size());
+
     if (auto ErrMsg = ResultBuffer.getOutOfBandError())
       return make_error<StringError>(ErrMsg);
 
@@ -347,10 +338,11 @@ template <typename... SPSTagTs>
 class WrapperFunction<void(SPSTagTs...)>
     : private WrapperFunction<SPSEmpty(SPSTagTs...)> {
 public:
-  template <typename... ArgTs>
-  static Error call(const void *FnTag, const ArgTs &...Args) {
+  template <typename DispatchFn, typename... ArgTs>
+  static Error call(DispatchFn &&Dispatch, const ArgTs &...Args) {
     SPSEmpty BE;
-    return WrapperFunction<SPSEmpty(SPSTagTs...)>::call(FnTag, BE, Args...);
+    return WrapperFunction<SPSEmpty(SPSTagTs...)>::call(
+        std::forward<DispatchFn>(Dispatch), BE, Args...);
   }
 
   using WrapperFunction<SPSEmpty(SPSTagTs...)>::handle;
@@ -504,6 +496,6 @@ public:
   }
 };
 
-} // end namespace __orc_rt
+} // namespace orc_rt
 
 #endif // ORC_RT_WRAPPER_FUNCTION_UTILS_H
diff --git compiler-rt/lib/profile/CMakeLists.txt compiler-rt/lib/profile/CMakeLists.txt
index 45e516489175..ef2349251489 100644
--- compiler-rt/lib/profile/CMakeLists.txt
+++ compiler-rt/lib/profile/CMakeLists.txt
@@ -111,6 +111,12 @@ if(COMPILER_RT_TARGET_HAS_UNAME)
      -DCOMPILER_RT_HAS_UNAME=1)
 endif()
 
+if(MSVC)
+  # profile historically has only been supported with the static runtime
+  # on windows
+  set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreaded)
+endif()
+
 # We don't use the C++ Standard Library here, so avoid including it by mistake.
 append_list_if(COMPILER_RT_HAS_NOSTDINCXX_FLAG -nostdinc++ EXTRA_FLAGS)
 # XRay uses C++ standard library headers.
diff --git compiler-rt/lib/rtsan/CMakeLists.txt compiler-rt/lib/rtsan/CMakeLists.txt
index bd7358e86e59..3f146a757a97 100644
--- compiler-rt/lib/rtsan/CMakeLists.txt
+++ compiler-rt/lib/rtsan/CMakeLists.txt
@@ -3,6 +3,7 @@ include_directories(..)
 set(RTSAN_CXX_SOURCES
   rtsan.cpp
   rtsan_context.cpp
+  rtsan_flags.cpp
   rtsan_stack.cpp
   rtsan_interceptors.cpp)
 
@@ -12,7 +13,10 @@ set(RTSAN_PREINIT_SOURCES
 set(RTSAN_HEADERS
   rtsan.h
   rtsan_context.h
-  rtsan_stack.h)
+  rtsan_flags.h
+  rtsan_flags.inc
+  rtsan_stack.h
+  )
 
 set(RTSAN_DEPS)
 
diff --git compiler-rt/lib/rtsan/rtsan.cpp compiler-rt/lib/rtsan/rtsan.cpp
index 1388ce66cbde..f929c9ae81c1 100644
--- compiler-rt/lib/rtsan/rtsan.cpp
+++ compiler-rt/lib/rtsan/rtsan.cpp
@@ -10,9 +10,11 @@
 
 #include <rtsan/rtsan.h>
 #include <rtsan/rtsan_context.h>
+#include <rtsan/rtsan_flags.h>
 #include <rtsan/rtsan_interceptors.h>
 
 #include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_mutex.h"
 
 using namespace __rtsan;
@@ -29,7 +31,11 @@ extern "C" {
 
 SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_init() {
   CHECK(!__rtsan_is_initialized());
+
+  SanitizerToolName = "RealtimeSanitizer";
+  InitializeFlags();
   InitializeInterceptors();
+
   SetInitialized();
 }
 
diff --git compiler-rt/lib/rtsan/rtsan.h compiler-rt/lib/rtsan/rtsan.h
index ae23609f97d2..ca72d41d1112 100644
--- compiler-rt/lib/rtsan/rtsan.h
+++ compiler-rt/lib/rtsan/rtsan.h
@@ -36,12 +36,10 @@ SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_realtime_enter();
 // intercepted method calls to the real methods.
 SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_realtime_exit();
 
-// Disable all RTSan error reporting.
-// Injected into the code if "nosanitize(realtime)" is on a function.
+// See documentation in rtsan_interface.h.
 SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_disable();
 
-// Re-enable all RTSan error reporting.
-// The counterpart to `__rtsan_disable`.
+// See documentation in rtsan_interface.h.
 SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_enable();
 
 SANITIZER_INTERFACE_ATTRIBUTE void
diff --git compiler-rt/lib/rtsan/rtsan_context.cpp compiler-rt/lib/rtsan/rtsan_context.cpp
index 97f18dfbbcca..8609394fa222 100644
--- compiler-rt/lib/rtsan/rtsan_context.cpp
+++ compiler-rt/lib/rtsan/rtsan_context.cpp
@@ -8,6 +8,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <rtsan/rtsan.h>
 #include <rtsan/rtsan_context.h>
 
 #include <rtsan/rtsan_stack.h>
@@ -75,6 +76,7 @@ void __rtsan::Context::BypassPop() { bypass_depth_--; }
 
 void __rtsan::ExpectNotRealtime(Context &context,
                                 const char *intercepted_function_name) {
+  CHECK(__rtsan_is_initialized());
   if (context.InRealtimeContext() && !context.IsBypassed()) {
     context.BypassPush();
 
@@ -93,10 +95,11 @@ void __rtsan::PrintDiagnostics(const char *intercepted_function_name, uptr pc,
                                uptr bp) {
   ScopedErrorReportLock l;
 
-  fprintf(stderr,
-          "Real-time violation: intercepted call to real-time unsafe function "
-          "`%s` in real-time context! Stack trace:\n",
-          intercepted_function_name);
+  Report("ERROR: RealtimeSanitizer: unsafe-library-call\n");
+  Printf("Intercepted call to real-time unsafe function "
+         "`%s` in real-time context!\n",
+         intercepted_function_name);
+
   __rtsan::PrintStackTrace(pc, bp);
 }
 
diff --git compiler-rt/lib/rtsan/rtsan_flags.cpp compiler-rt/lib/rtsan/rtsan_flags.cpp
new file mode 100644
index 000000000000..beab2a2fc5d8
--- /dev/null
+++ compiler-rt/lib/rtsan/rtsan_flags.cpp
@@ -0,0 +1,58 @@
+//===--- rtsan_flags.cpp - Realtime Sanitizer -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of RealtimeSanitizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "rtsan/rtsan_flags.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_flags.h"
+
########## TRUNCATED ###########