diff --git a/bolt/include/bolt/Utils/Utils.h b/bolt/include/bolt/Utils/Utils.h index 3886c5f8757c08..9baee7d94066de 100644 --- a/bolt/include/bolt/Utils/Utils.h +++ b/bolt/include/bolt/Utils/Utils.h @@ -41,6 +41,11 @@ std::string getEscapedName(const StringRef &Name); /// Return the unescaped name std::string getUnescapedName(const StringRef &Name); +/// Return a common part for a given \p Name wrt a given \p Suffixes list. +/// Preserve the suffix if \p KeepSuffix is set, only dropping characters +/// following it, otherwise drop the suffix as well. +std::optional getCommonName(const StringRef Name, bool KeepSuffix, + ArrayRef Suffixes); /// LTO-generated function names take a form: /// /// .lto_priv./... diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 813d825f8b570c..4aeeb1daab1b94 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -88,7 +88,7 @@ MaxSamples("max-samples", cl::cat(AggregatorCategory)); extern cl::opt ProfileFormat; -extern cl::opt ProfileUsePseudoProbes; +extern cl::opt ProfileWritePseudoProbes; extern cl::opt SaveProfile; cl::opt ReadPreAggregated( @@ -2300,7 +2300,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, yaml::bolt::BinaryProfile BP; const MCPseudoProbeDecoder *PseudoProbeDecoder = - opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; + opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; // Fill out the header info. BP.Header.Version = 1; @@ -2427,11 +2427,15 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, } } } - // Drop blocks without a hash, won't be useful for stale matching. - llvm::erase_if(YamlBF.Blocks, - [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { - return YamlBB.Hash == (yaml::Hex64)0; - }); + // Skip printing if there's no profile data + llvm::erase_if( + YamlBF.Blocks, [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { + auto HasCount = [](const auto &SI) { return SI.Count; }; + bool HasAnyCount = YamlBB.ExecCount || + llvm::any_of(YamlBB.Successors, HasCount) || + llvm::any_of(YamlBB.CallSites, HasCount); + return !HasAnyCount; + }); BP.Functions.emplace_back(YamlBF); } } diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 2ab4bf9a4b3e13..67ed32017667d6 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -49,11 +49,6 @@ llvm::cl::opt llvm::cl::opt ProfileUseDFS("profile-use-dfs", cl::desc("use DFS order for YAML profile"), cl::Hidden, cl::cat(BoltOptCategory)); - -llvm::cl::opt ProfileUsePseudoProbes( - "profile-use-pseudo-probes", - cl::desc("Use pseudo probes for profile generation and matching"), - cl::Hidden, cl::cat(BoltOptCategory)); } // namespace opts namespace llvm { diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index f74cf60e076d0a..ffbf2388e912fb 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -13,6 +13,7 @@ #include "bolt/Profile/DataAggregator.h" #include "bolt/Profile/ProfileReaderBase.h" #include "bolt/Rewrite/RewriteInstance.h" +#include "bolt/Utils/CommandLineOpts.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -21,8 +22,12 @@ #define DEBUG_TYPE "bolt-prof" namespace opts { -extern llvm::cl::opt ProfileUseDFS; -extern llvm::cl::opt ProfileUsePseudoProbes; +using namespace llvm; +extern cl::opt ProfileUseDFS; +cl::opt ProfileWritePseudoProbes( + "profile-write-pseudo-probes", + cl::desc("Use pseudo probes in profile generation"), cl::Hidden, + cl::cat(BoltOptCategory)); } // namespace opts namespace llvm { @@ -59,7 +64,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, yaml::bolt::BinaryFunctionProfile YamlBF; const BinaryContext &BC = BF.getBinaryContext(); const MCPseudoProbeDecoder *PseudoProbeDecoder = - opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; + opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR; diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 4925b4b385d9b1..8647df4b0edf82 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -14,6 +14,7 @@ #include "bolt/Rewrite/MetadataRewriter.h" #include "bolt/Rewrite/MetadataRewriters.h" #include "bolt/Utils/CommandLineOpts.h" +#include "bolt/Utils/Utils.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCPseudoProbe.h" #include "llvm/Support/CommandLine.h" @@ -49,7 +50,7 @@ static cl::opt PrintPseudoProbes( clEnumValN(PPP_All, "all", "enable all debugging printout")), cl::Hidden, cl::cat(BoltCategory)); -extern cl::opt ProfileUsePseudoProbes; +extern cl::opt ProfileWritePseudoProbes; } // namespace opts namespace { @@ -71,7 +72,8 @@ class PseudoProbeRewriter final : public MetadataRewriter { /// Parse .pseudo_probe_desc section and .pseudo_probe section /// Setup Pseudo probe decoder - void parsePseudoProbe(); + /// If \p ProfiledOnly is set, only parse records for functions with profile. + void parsePseudoProbe(bool ProfiledOnly = false); /// PseudoProbe decoder std::shared_ptr ProbeDecoderPtr; @@ -90,21 +92,21 @@ class PseudoProbeRewriter final : public MetadataRewriter { }; Error PseudoProbeRewriter::preCFGInitializer() { - if (opts::ProfileUsePseudoProbes) - parsePseudoProbe(); + if (opts::ProfileWritePseudoProbes) + parsePseudoProbe(true); return Error::success(); } Error PseudoProbeRewriter::postEmitFinalizer() { - if (!opts::ProfileUsePseudoProbes) + if (!opts::ProfileWritePseudoProbes) parsePseudoProbe(); updatePseudoProbes(); return Error::success(); } -void PseudoProbeRewriter::parsePseudoProbe() { +void PseudoProbeRewriter::parsePseudoProbe(bool ProfiledOnly) { MCPseudoProbeDecoder &ProbeDecoder(*ProbeDecoderPtr); PseudoProbeDescSection = BC.getUniqueSectionByName(".pseudo_probe_desc"); PseudoProbeSection = BC.getUniqueSectionByName(".pseudo_probe"); @@ -133,10 +135,22 @@ void PseudoProbeRewriter::parsePseudoProbe() { MCPseudoProbeDecoder::Uint64Set GuidFilter; MCPseudoProbeDecoder::Uint64Map FuncStartAddrs; + SmallVector Suffixes( + {".destroy", ".resume", ".llvm.", ".cold", ".warm"}); for (const BinaryFunction *F : BC.getAllBinaryFunctions()) { + bool HasProfile = F->hasProfileAvailable(); for (const MCSymbol *Sym : F->getSymbols()) { - FuncStartAddrs[Function::getGUID(NameResolver::restore(Sym->getName()))] = - F->getAddress(); + StringRef SymName = Sym->getName(); + for (auto Name : {std::optional(NameResolver::restore(SymName)), + getCommonName(SymName, false, Suffixes)}) { + if (!Name) + continue; + SymName = *Name; + uint64_t GUID = Function::getGUID(SymName); + FuncStartAddrs[GUID] = F->getAddress(); + if (ProfiledOnly && HasProfile) + GuidFilter.insert(GUID); + } } } Contents = PseudoProbeSection->getContents(); @@ -155,13 +169,25 @@ void PseudoProbeRewriter::parsePseudoProbe() { ProbeDecoder.printProbesForAllAddresses(outs()); } - for (const auto &FuncDesc : ProbeDecoder.getGUID2FuncDescMap()) { - uint64_t GUID = FuncDesc.FuncGUID; - if (!FuncStartAddrs.contains(GUID)) - continue; - BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncStartAddrs[GUID]); - assert(BF); - BF->setGUID(GUID); + const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap(); + // Checks GUID in GUID2Func and returns it if it's present or null otherwise. + auto checkGUID = [&](StringRef SymName) -> uint64_t { + uint64_t GUID = Function::getGUID(SymName); + if (GUID2Func.find(GUID) == GUID2Func.end()) + return 0; + return GUID; + }; + for (BinaryFunction *F : BC.getAllBinaryFunctions()) { + for (const MCSymbol *Sym : F->getSymbols()) { + StringRef SymName = NameResolver::restore(Sym->getName()); + uint64_t GUID = checkGUID(SymName); + std::optional CommonName = + getCommonName(SymName, false, Suffixes); + if (!GUID && CommonName) + GUID = checkGUID(*CommonName); + if (GUID) + F->setGUID(GUID); + } } } diff --git a/bolt/lib/Utils/Utils.cpp b/bolt/lib/Utils/Utils.cpp index 718e97535fd22a..ecc2f1010a9858 100644 --- a/bolt/lib/Utils/Utils.cpp +++ b/bolt/lib/Utils/Utils.cpp @@ -66,15 +66,21 @@ std::string getUnescapedName(const StringRef &Name) { return Output; } -std::optional getLTOCommonName(const StringRef Name) { - for (StringRef Suffix : {".__uniq.", ".lto_priv.", ".constprop.", ".llvm."}) { +std::optional getCommonName(const StringRef Name, bool KeepSuffix, + ArrayRef Suffixes) { + for (StringRef Suffix : Suffixes) { size_t LTOSuffixPos = Name.find(Suffix); if (LTOSuffixPos != StringRef::npos) - return Name.substr(0, LTOSuffixPos + Suffix.size()); + return Name.substr(0, LTOSuffixPos + (KeepSuffix ? Suffix.size() : 0)); } return std::nullopt; } +std::optional getLTOCommonName(const StringRef Name) { + return getCommonName(Name, true, + {".__uniq.", ".lto_priv.", ".constprop.", ".llvm."}); +} + std::optional readDWARFExpressionTargetReg(StringRef ExprBytes) { uint8_t Opcode = ExprBytes[0]; if (Opcode == dwarf::DW_CFA_def_cfa_expression) diff --git a/bolt/test/X86/pseudoprobe-decoding-inline.test b/bolt/test/X86/pseudoprobe-decoding-inline.test index b361551e5711ea..1fdd00c7ef6c4b 100644 --- a/bolt/test/X86/pseudoprobe-decoding-inline.test +++ b/bolt/test/X86/pseudoprobe-decoding-inline.test @@ -6,11 +6,11 @@ # PREAGG: B X:0 #main# 1 0 ## Check pseudo-probes in regular YAML profile (non-BOLTed binary) # RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin %t.preagg PREAGG -# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-use-pseudo-probes +# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-write-pseudo-probes # RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML ## Check pseudo-probes in BAT YAML profile (BOLTed binary) # RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG -# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-use-pseudo-probes +# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-write-pseudo-probes # RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML # CHECK-YAML: name: bar # CHECK-YAML: - bid: 0 @@ -30,7 +30,7 @@ # CHECK-YAML: guid: 0xDB956436E78DD5FA # CHECK-YAML: pseudo_probe_desc_hash: 0x10000FFFFFFFF # -## Check that without --profile-use-pseudo-probes option, no pseudo probes are +## Check that without --profile-write-pseudo-probes option, no pseudo probes are ## generated # RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata # RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT diff --git a/clang-tools-extra/clang-tidy/add_new_check.py b/clang-tools-extra/clang-tidy/add_new_check.py index d384dbae28abbc..e366f100535357 100755 --- a/clang-tools-extra/clang-tidy/add_new_check.py +++ b/clang-tools-extra/clang-tidy/add_new_check.py @@ -17,7 +17,7 @@ import textwrap # FIXME Python 3.9: Replace typing.Tuple with builtins.tuple. -from typing import Optional, Tuple +from typing import Optional, Tuple, Match # Adapts the module's CMakelist file. Returns 'True' if it could add a new @@ -511,7 +511,7 @@ def has_auto_fix(check_name: str) -> str: return "" - def process_doc(doc_file: Tuple[str, str]) -> Tuple[str, Optional[re.Match[str]]]: + def process_doc(doc_file: Tuple[str, str]) -> Tuple[str, Optional[Match[str]]]: check_name = doc_file[0] + "-" + doc_file[1].replace(".rst", "") with io.open(os.path.join(docs_dir, *doc_file), "r", encoding="utf8") as doc: diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/missing-std-forward.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/missing-std-forward.cpp index 8116db58c937d4..98c592db7ce226 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/missing-std-forward.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/missing-std-forward.cpp @@ -187,14 +187,14 @@ void lambda_value_reference_auxiliary_var(T&& t) { namespace deleted_functions { template -void f(T &&) = delete; +void f(T &&t) = delete; struct S { template - S(T &&) = delete; + S(T &&t) = delete; template - void operator&(T &&) = delete; + void operator&(T &&t) = delete; }; } // namespace deleted_functions diff --git a/clang/docs/RealtimeSanitizer.rst b/clang/docs/RealtimeSanitizer.rst index 799cd43509c6e6..5e281a2a357907 100644 --- a/clang/docs/RealtimeSanitizer.rst +++ b/clang/docs/RealtimeSanitizer.rst @@ -83,3 +83,53 @@ non-zero exit code. #13 0x00010230dd64 in main main.cpp:9 #14 0x0001958960dc () #15 0x2f557ffffffffffc () + +Disabling +--------- + +In some circumstances, you may want to suppress error reporting in a specific scope. + +In C++, this is achieved via ``__rtsan::ScopedDisabler``. Within the scope where the ``ScopedDisabler`` object is instantiated, all sanitizer error reports are suppressed. This suppression applies to the current scope as well as all invoked functions, including any functions called transitively. + +.. code-block:: c++ + + #include + + void process(const std::vector& buffer) [[clang::nonblocking]] { + { + __rtsan::ScopedDisabler d; + ... + } + } + +If RealtimeSanitizer is not enabled at compile time (i.e., the code is not compiled with the ``-fsanitize=realtime`` flag), the ``ScopedDisabler`` is compiled as a no-op. + +In C, you can use the ``__rtsan_disable()`` and ``rtsan_enable()`` functions to manually disable and re-enable RealtimeSanitizer checks. + +.. code-block:: c++ + + #include + + int process(const float* buffer) [[clang::nonblocking]] + { + { + __rtsan_disable(); + + ... + + __rtsan_enable(); + } + } + +Each call to ``__rtsan_disable()`` must be paired with a subsequent call to ``__rtsan_enable()`` to restore normal sanitizer functionality. If a corresponding ``rtsan_enable()`` call is not made, the behavior is undefined. + +Compile-time sanitizer detection +-------------------------------- + +Clang provides the pre-processor macro ``__has_feature`` which may be used to detect if RealtimeSanitizer is enabled at compile-time. + +.. code-block:: c++ + + #if defined(__has_feature) && __has_feature(realtime_sanitizer) + ... + #endif diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 59ccdf1e15cd81..9860b25f2e7fa6 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -108,6 +108,9 @@ C++ Language Changes - Allow single element access of GCC vector/ext_vector_type object to be constant expression. Supports the `V.xyzw` syntax and other tidbits as seen in OpenCL. Selecting multiple elements is left as a future work. +- Implement `CWG1815 `_. Support lifetime extension + of temporary created by aggregate initialization using a default member + initializer. - Accept C++26 user-defined ``static_assert`` messages in C++11 as an extension. @@ -457,6 +460,8 @@ LoongArch Support RISC-V Support ^^^^^^^^^^^^^^ +- The option ``-mcmodel=large`` for the large code model is supported. + CUDA/HIP Language Changes ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index 2e80eef2c8b9bc..ad73f031922a0b 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -209,8 +209,8 @@ TARGET_BUILTIN(__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4, "V4fV8UsV8UsV4f" TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fh*", "nU", "fp16") TARGET_BUILTIN(__builtin_wasm_storef16_f32, "vfh*", "n", "fp16") TARGET_BUILTIN(__builtin_wasm_splat_f16x8, "V8hf", "nc", "fp16") -TARGET_BUILTIN(__builtin_wasm_extract_lane_f16x8, "fV8hi", "nc", "fp16") -TARGET_BUILTIN(__builtin_wasm_replace_lane_f16x8, "V8hV8hif", "nc", "fp16") +TARGET_BUILTIN(__builtin_wasm_extract_lane_f16x8, "fV8hIi", "nc", "fp16") +TARGET_BUILTIN(__builtin_wasm_replace_lane_f16x8, "V8hV8hIif", "nc", "fp16") // Reference Types builtins // Some builtins are custom type-checked - see 't' as part of the third argument, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index b160fee827a750..efdc058edca56d 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10162,13 +10162,6 @@ def warn_dangling_pointer_assignment : Warning< "will be destroyed at the end of the full-expression">, InGroup; -def warn_unsupported_lifetime_extension : Warning< - "lifetime extension of " - "%select{temporary|backing array of initializer list}0 created " - "by aggregate initialization using a default member initializer " - "is not yet supported; lifetime of %select{temporary|backing array}0 " - "will end at the end of the full-expression">, InGroup; - // For non-floating point, expressions of the form x == x or x != x // should result in a warning, since these always evaluate to a constant. // Array comparisons have similar warnings diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 68c782a15c6f1b..99eef472223a00 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -6403,6 +6403,9 @@ class Sema final : public SemaBase { /// example, in a for-range initializer). bool InLifetimeExtendingContext = false; + /// Whether we should rebuild CXXDefaultArgExpr and CXXDefaultInitExpr. + bool RebuildDefaultArgOrDefaultInit = false; + // When evaluating immediate functions in the initializer of a default // argument or default member initializer, this is the declaration whose // default initializer is being evaluated and the location of the call @@ -7810,9 +7813,11 @@ class Sema final : public SemaBase { } bool isInLifetimeExtendingContext() const { - assert(!ExprEvalContexts.empty() && - "Must be in an expression evaluation context"); - return ExprEvalContexts.back().InLifetimeExtendingContext; + return currentEvaluationContext().InLifetimeExtendingContext; + } + + bool needsRebuildOfDefaultArgOrInit() const { + return currentEvaluationContext().RebuildDefaultArgOrDefaultInit; } bool isCheckingDefaultArgumentOrInitializer() const { @@ -7854,18 +7859,6 @@ class Sema final : public SemaBase { return Res; } - /// keepInLifetimeExtendingContext - Pull down InLifetimeExtendingContext - /// flag from previous context. - void keepInLifetimeExtendingContext() { - if (ExprEvalContexts.size() > 2 && - parentEvaluationContext().InLifetimeExtendingContext) { - auto &LastRecord = ExprEvalContexts.back(); - auto &PrevRecord = parentEvaluationContext(); - LastRecord.InLifetimeExtendingContext = - PrevRecord.InLifetimeExtendingContext; - } - } - DefaultedComparisonKind getDefaultedComparisonKind(const FunctionDecl *FD) { return getDefaultedFunctionKind(FD).asComparison(); } diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 7331bcf249266d..898f4392465fdf 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1188,15 +1188,6 @@ class ASTReader /// once recursing loading has been completed. llvm::SmallVector PendingOdrMergeChecks; - /// Lambdas that need to be loaded right after the function they belong to. - /// It is required to have canonical declaration for lambda class from the - /// same module as enclosing function. This is required to correctly resolve - /// captured variables in the lambda. Without this, due to lazy - /// deserialization canonical declarations for the function and lambdas can - /// be from different modules and DeclRefExprs may refer to the AST nodes - /// that don't exist in the function. - SmallVector PendingLambdas; - using DataPointers = std::pair; using ObjCInterfaceDataPointers = diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h index 557f0e547ab4a8..4a343f2872d8d9 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h @@ -76,7 +76,7 @@ class DependencyScanningService { DependencyScanningService( ScanningMode Mode, ScanningOutputFormat Format, ScanningOptimizations OptimizeArgs = ScanningOptimizations::Default, - bool EagerLoadModules = false); + bool EagerLoadModules = false, bool TraceVFS = false); ScanningMode getMode() const { return Mode; } @@ -86,6 +86,8 @@ class DependencyScanningService { bool shouldEagerLoadModules() const { return EagerLoadModules; } + bool shouldTraceVFS() const { return TraceVFS; } + DependencyScanningFilesystemSharedCache &getSharedCache() { return SharedCache; } @@ -97,6 +99,8 @@ class DependencyScanningService { const ScanningOptimizations OptimizeArgs; /// Whether to set up command-lines to load PCM files eagerly. const bool EagerLoadModules; + /// Whether to trace VFS accesses. + const bool TraceVFS; /// The global file system cache. DependencyScanningFilesystemSharedCache SharedCache; }; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h index cb9476d1550df3..012237e0278f4a 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h @@ -144,6 +144,8 @@ class DependencyScanningTool { StringRef CWD, const llvm::DenseSet &AlreadySeen, LookupModuleOutputCallback LookupModuleOutput); + llvm::vfs::FileSystem &getWorkerVFS() const { return Worker.getVFS(); } + private: DependencyScanningWorker Worker; }; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h index 0f607862194b31..da6e0401411a34 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h @@ -104,6 +104,8 @@ class DependencyScanningWorker { bool shouldEagerLoadModules() const { return EagerLoadModules; } + llvm::vfs::FileSystem &getVFS() const { return *BaseFS; } + private: std::shared_ptr PCHContainerOps; /// The file system to be used during the scan. diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 6545912ed160d9..e10142eff8ec47 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1924,7 +1924,6 @@ bool CastExpr::CastConsistency() const { case CK_FixedPointToIntegral: case CK_IntegralToFixedPoint: case CK_MatrixCast: - case CK_HLSLVectorTruncation: assert(!getType()->isBooleanType() && "unheralded conversion to bool"); goto CheckNoBasePath; @@ -1945,6 +1944,7 @@ bool CastExpr::CastConsistency() const { case CK_BuiltinFnToFnPtr: case CK_FixedPointToBoolean: case CK_HLSLArrayRValue: + case CK_HLSLVectorTruncation: CheckNoBasePath: assert(path_empty() && "Cast kind should not have a base path!"); break; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 78d25006360042..6387e375dda79c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -10935,6 +10935,15 @@ bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) { return true; } + case CK_HLSLVectorTruncation: { + APValue Val; + SmallVector Elements; + if (!EvaluateVector(SE, Val, Info)) + return Error(E); + for (unsigned I = 0; I < NElts; I++) + Elements.push_back(Val.getVectorElt(I)); + return Success(Elements, E); + } default: return ExprEvaluatorBaseTy::VisitCastExpr(E); } @@ -14478,7 +14487,6 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_FixedPointCast: case CK_IntegralToFixedPoint: case CK_MatrixCast: - case CK_HLSLVectorTruncation: llvm_unreachable("invalid cast kind for integral value"); case CK_BitCast: @@ -14651,6 +14659,12 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { return false; return Success(Value, E); } + case CK_HLSLVectorTruncation: { + APValue Val; + if (!EvaluateVector(SubExpr, Val, Info)) + return Error(E); + return Success(Val.getVectorElt(0), E); + } } llvm_unreachable("unknown cast resulting in integral value"); @@ -15177,6 +15191,12 @@ bool FloatExprEvaluator::VisitCastExpr(const CastExpr *E) { Result = V.getComplexFloatReal(); return true; } + case CK_HLSLVectorTruncation: { + APValue Val; + if (!EvaluateVector(SubExpr, Val, Info)) + return Error(E); + return Success(Val.getVectorElt(0), E); + } } } diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index 4509cee1ca0fed..6097b85a03064b 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -692,5 +692,16 @@ void FileManager::PrintStats() const { llvm::errs() << NumFileLookups << " file lookups, " << NumFileCacheMisses << " file cache misses.\n"; + getVirtualFileSystem().visit([](llvm::vfs::FileSystem &VFS) { + if (auto *T = dyn_cast_or_null(&VFS)) + llvm::errs() << "\n*** Virtual File System Stats:\n" + << T->NumStatusCalls << " status() calls\n" + << T->NumOpenFileForReadCalls << " openFileForRead() calls\n" + << T->NumDirBeginCalls << " dir_begin() calls\n" + << T->NumGetRealPathCalls << " getRealPath() calls\n" + << T->NumExistsCalls << " exists() calls\n" + << T->NumIsLocalCalls << " isLocal() calls\n"; + }); + //llvm::errs() << PagesMapped << BytesOfPagesMapped << FSLookups; } diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 59c5927717933d..ae14d74f2d9151 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -840,8 +840,10 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { FatBinStr = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr, - "__hip_fatbin_" + CGM.getContext().getCUIDHash(), nullptr, - llvm::GlobalVariable::NotThreadLocal); + "__hip_fatbin" + (CGM.getLangOpts().CUID.empty() + ? "" + : "_" + CGM.getContext().getCUIDHash()), + nullptr, llvm::GlobalVariable::NotThreadLocal); cast(FatBinStr)->setSection(FatbinConstantName); } @@ -894,8 +896,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // thread safety of the loaded program. Therefore we can assume sequential // execution of constructor functions here. if (IsHIP) { - auto Linkage = CudaGpuBinary ? llvm::GlobalValue::InternalLinkage - : llvm::GlobalValue::ExternalLinkage; + auto Linkage = RelocatableDeviceCode ? llvm::GlobalValue::ExternalLinkage + : llvm::GlobalValue::InternalLinkage; llvm::BasicBlock *IfBlock = llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc); llvm::BasicBlock *ExitBlock = @@ -905,10 +907,11 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { GpuBinaryHandle = new llvm::GlobalVariable( TheModule, PtrTy, /*isConstant=*/false, Linkage, /*Initializer=*/ - CudaGpuBinary ? llvm::ConstantPointerNull::get(PtrTy) : nullptr, - CudaGpuBinary - ? "__hip_gpubin_handle" - : "__hip_gpubin_handle_" + CGM.getContext().getCUIDHash()); + !RelocatableDeviceCode ? llvm::ConstantPointerNull::get(PtrTy) + : nullptr, + "__hip_gpubin_handle" + (CGM.getLangOpts().CUID.empty() + ? "" + : "_" + CGM.getContext().getCUIDHash())); GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign()); // Prevent the weak symbol in different shared libraries being merged. if (Linkage != llvm::GlobalValue::InternalLinkage) diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 9027bab6b680d4..82caf65ac68d6b 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2709,14 +2709,19 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { return CGF.CGM.createOpenCLIntToSamplerConversion(E, CGF); case CK_HLSLVectorTruncation: { - assert(DestTy->isVectorType() && "Expected dest type to be vector type"); + assert((DestTy->isVectorType() || DestTy->isBuiltinType()) && + "Destination type must be a vector or builtin type."); Value *Vec = Visit(const_cast(E)); - SmallVector Mask; - unsigned NumElts = DestTy->castAs()->getNumElements(); - for (unsigned I = 0; I != NumElts; ++I) - Mask.push_back(I); + if (auto *VecTy = DestTy->getAs()) { + SmallVector Mask; + unsigned NumElts = VecTy->getNumElements(); + for (unsigned I = 0; I != NumElts; ++I) + Mask.push_back(I); - return Builder.CreateShuffleVector(Vec, Mask, "trunc"); + return Builder.CreateShuffleVector(Vec, Mask, "trunc"); + } + llvm::Value *Zero = llvm::Constant::getNullValue(CGF.SizeTy); + return Builder.CreateExtractElement(Vec, Zero, "cast.vtrunc"); } } // end of switch diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 2f119feb93aaf3..ec617eec67192c 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -500,7 +500,7 @@ bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize, bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // For the soft-float ABI variant, no types are considered to be homogeneous // aggregates. - if (Kind == AArch64ABIKind::AAPCSSoft) + if (isSoftFloat()) return false; // Homogeneous aggregates for AAPCS64 must have base types of a floating @@ -555,8 +555,8 @@ RValue AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty, BaseTy = ArrTy->getElementType(); NumRegs = ArrTy->getNumElements(); } - bool IsFPR = Kind != AArch64ABIKind::AAPCSSoft && - (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy()); + bool IsFPR = + !isSoftFloat() && (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy()); // The AArch64 va_list type and handling is specified in the Procedure Call // Standard, section B.4: diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2ce6779f4b43e3..f58b816a9709dd 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2902,11 +2902,16 @@ void tools::addMCModel(const Driver &D, const llvm::opt::ArgList &Args, } else if (Triple.isPPC64() || Triple.isOSAIX()) { Ok = CM == "small" || CM == "medium" || CM == "large"; } else if (Triple.isRISCV()) { + // Large code model is disallowed to be used with PIC code model. + if (CM == "large" && RelocationModel != llvm::Reloc::Static) + D.Diag(diag::err_drv_argument_not_allowed_with) + << A->getAsString(Args) << "-fpic"; if (CM == "medlow") CM = "small"; else if (CM == "medany") CM = "medium"; - Ok = CM == "small" || CM == "medium"; + Ok = CM == "small" || CM == "medium" || + (CM == "large" && Triple.isRISCV64()); } else if (Triple.getArch() == llvm::Triple::x86_64) { Ok = llvm::is_contained({"small", "kernel", "medium", "large", "tiny"}, CM); diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 5843571718b3a2..f29f8796ea9290 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -815,7 +815,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, return Tok.is(tok::l_paren) && Tok.ParameterCount > 0 && Tok.Previous && Tok.Previous->is(tok::identifier); }; - const auto IsInTemplateString = [this](const FormatToken &Tok) { + auto IsInTemplateString = [this](const FormatToken &Tok) { if (!Style.isJavaScript()) return false; for (const auto *Prev = &Tok; Prev; Prev = Prev->Previous) { @@ -827,7 +827,10 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, return false; }; // Identifies simple (no expression) one-argument function calls. - const auto IsSimpleFunction = [&](const FormatToken &Tok) { + auto StartsSimpleOneArgList = [&](const FormatToken &TokAfterLParen) { + assert(TokAfterLParen.isNot(tok::comment) || TokAfterLParen.Next); + const auto &Tok = + TokAfterLParen.is(tok::comment) ? *TokAfterLParen.Next : TokAfterLParen; if (!Tok.FakeLParens.empty() && Tok.FakeLParens.back() > prec::Unknown) return false; // Nested calls that involve `new` expressions also look like simple @@ -836,6 +839,11 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, // - foo(::new Bar()) if (Tok.is(tok::kw_new) || Tok.startsSequence(tok::coloncolon, tok::kw_new)) return true; + if (Tok.is(TT_UnaryOperator) || + (Style.isJavaScript() && + Tok.isOneOf(tok::ellipsis, Keywords.kw_await))) { + return true; + } const auto *Previous = Tok.Previous; if (!Previous || (!Previous->isOneOf(TT_FunctionDeclarationLParen, TT_LambdaDefinitionLParen) && @@ -861,7 +869,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, // or // caaaaaaaaaaaaaaaaaaaaal( // new SomethingElseeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee()); - !IsSimpleFunction(Current)) { + !StartsSimpleOneArgList(Current)) { CurrentState.NoLineBreak = true; } diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 1364641a9b71e1..5a273474f1d6b6 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -381,6 +381,9 @@ FileManager *CompilerInstance::createFileManager( : createVFSFromCompilerInvocation(getInvocation(), getDiagnostics()); assert(VFS && "FileManager has no VFS?"); + if (getFrontendOpts().ShowStats) + VFS = + llvm::makeIntrusiveRefCnt(std::move(VFS)); FileMgr = new FileManager(getFileSystemOpts(), std::move(VFS)); return FileMgr.get(); } diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h index 67d12f6f2cf419..14e36e85da8efa 100644 --- a/clang/lib/Headers/wasm_simd128.h +++ b/clang/lib/Headers/wasm_simd128.h @@ -1888,18 +1888,17 @@ static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_splat(float __a) { return (v128_t)__builtin_wasm_splat_f16x8(__a); } -static __inline__ float __FP16_FN_ATTRS wasm_f16x8_extract_lane(v128_t __a, - int __i) - __REQUIRE_CONSTANT(__i) { - return __builtin_wasm_extract_lane_f16x8((__f16x8)__a, __i); -} +#ifdef __wasm_fp16__ +// TODO Replace the following macros with regular C functions and use normal +// target-independent vector code like the other replace/extract instructions. -static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_replace_lane(v128_t __a, - int __i, - float __b) - __REQUIRE_CONSTANT(__i) { - return (v128_t)__builtin_wasm_replace_lane_f16x8((__f16x8)__a, __i, __b); -} +#define wasm_f16x8_extract_lane(__a, __i) \ + (__builtin_wasm_extract_lane_f16x8((__f16x8)(__a), __i)) + +#define wasm_f16x8_replace_lane(__a, __i, __b) \ + ((v128_t)__builtin_wasm_replace_lane_f16x8((__f16x8)(__a), __i, __b)) + +#endif static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_f16x8((__f16x8)__a); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 61a1ca3da6bca0..1f56884be392d6 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -2509,8 +2509,9 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS, // P2718R0 - Lifetime extension in range-based for loops. if (getLangOpts().CPlusPlus23) { - auto &LastRecord = Actions.ExprEvalContexts.back(); + auto &LastRecord = Actions.currentEvaluationContext(); LastRecord.InLifetimeExtendingContext = true; + LastRecord.RebuildDefaultArgOrDefaultInit = true; } if (getLangOpts().OpenMP) diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index c8e703036c132c..77c73f47658fe1 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -896,11 +896,6 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path, enum PathLifetimeKind { /// Lifetime-extend along this path. Extend, - /// We should lifetime-extend, but we don't because (due to technical - /// limitations) we can't. This happens for default member initializers, - /// which we don't clone for every use, so we don't have a unique - /// MaterializeTemporaryExpr to update. - ShouldExtend, /// Do not lifetime extend along this path. NoExtend }; @@ -912,7 +907,7 @@ shouldLifetimeExtendThroughPath(const IndirectLocalPath &Path) { PathLifetimeKind Kind = PathLifetimeKind::Extend; for (auto Elem : Path) { if (Elem.Kind == IndirectLocalPathEntry::DefaultInit) - Kind = PathLifetimeKind::ShouldExtend; + return PathLifetimeKind::Extend; else if (Elem.Kind != IndirectLocalPathEntry::LambdaCaptureInit) return PathLifetimeKind::NoExtend; } @@ -1058,17 +1053,6 @@ static void checkExprLifetimeImpl(Sema &SemaRef, // Also visit the temporaries lifetime-extended by this initializer. return true; - case PathLifetimeKind::ShouldExtend: - // We're supposed to lifetime-extend the temporary along this path (per - // the resolution of DR1815), but we don't support that yet. - // - // FIXME: Properly handle this situation. Perhaps the easiest approach - // would be to clone the initializer expression on each use that would - // lifetime extend its temporaries. - SemaRef.Diag(DiagLoc, diag::warn_unsupported_lifetime_extension) - << RK << DiagRange; - break; - case PathLifetimeKind::NoExtend: // If the path goes through the initialization of a variable or field, // it can't possibly reach a temporary created in this full-expression. diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index 071e64fe56d48a..da7bbf8baa74df 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -525,6 +525,16 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() { .addArraySubscriptOperators() .completeDefinition(); }); + + Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "StructuredBuffer") + .addSimpleTemplateParams(*SemaPtr, {"element_type"}) + .Record; + onCompletion(Decl, [this](CXXRecordDecl *Decl) { + setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, + ResourceKind::TypedBuffer, /*IsROV=*/false) + .addArraySubscriptOperators() + .completeDefinition(); + }); } void HLSLExternalSemaSource::onCompletion(CXXRecordDecl *Record, diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 32dac4440fb82a..8f3e15cc9a9bb7 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5429,6 +5429,8 @@ struct EnsureImmediateInvocationInDefaultArgs EnsureImmediateInvocationInDefaultArgs(Sema &SemaRef) : TreeTransform(SemaRef) {} + bool AlwaysRebuild() { return true; } + // Lambda can only have immediate invocations in the default // args of their parameters, which is transformed upon calling the closure. // The body is not a subexpression, so we have nothing to do. @@ -5470,7 +5472,7 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, assert(Param->hasDefaultArg() && "can't build nonexistent default arg"); bool NestedDefaultChecking = isCheckingDefaultArgumentOrInitializer(); - bool InLifetimeExtendingContext = isInLifetimeExtendingContext(); + bool NeedRebuild = needsRebuildOfDefaultArgOrInit(); std::optional InitializationContext = OutermostDeclarationWithDelayedImmediateInvocations(); @@ -5506,13 +5508,15 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, // Rewrite the call argument that was created from the corresponding // parameter's default argument. - if (V.HasImmediateCalls || InLifetimeExtendingContext) { + if (V.HasImmediateCalls || + (NeedRebuild && isa_and_present(Param->getInit()))) { if (V.HasImmediateCalls) ExprEvalContexts.back().DelayedDefaultInitializationContext = { CallLoc, Param, CurContext}; // Pass down lifetime extending flag, and collect temporaries in // CreateMaterializeTemporaryExpr when we rewrite the call argument. - keepInLifetimeExtendingContext(); + currentEvaluationContext().InLifetimeExtendingContext = + parentEvaluationContext().InLifetimeExtendingContext; EnsureImmediateInvocationInDefaultArgs Immediate(*this); ExprResult Res; runWithSufficientStackSpace(CallLoc, [&] { @@ -5558,7 +5562,7 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { Expr *Init = nullptr; bool NestedDefaultChecking = isCheckingDefaultArgumentOrInitializer(); - + bool NeedRebuild = needsRebuildOfDefaultArgOrInit(); EnterExpressionEvaluationContext EvalContext( *this, ExpressionEvaluationContext::PotentiallyEvaluated, Field); @@ -5593,12 +5597,27 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { ImmediateCallVisitor V(getASTContext()); if (!NestedDefaultChecking) V.TraverseDecl(Field); - if (V.HasImmediateCalls) { + + // CWG1815 + // Support lifetime extension of temporary created by aggregate + // initialization using a default member initializer. We should rebuild + // the initializer in a lifetime extension context if the initializer + // expression is an ExprWithCleanups. Then make sure the normal lifetime + // extension code recurses into the default initializer and does lifetime + // extension when warranted. + bool ContainsAnyTemporaries = + isa_and_present(Field->getInClassInitializer()); + if (Field->getInClassInitializer() && + !Field->getInClassInitializer()->containsErrors() && + (V.HasImmediateCalls || (NeedRebuild && ContainsAnyTemporaries))) { ExprEvalContexts.back().DelayedDefaultInitializationContext = {Loc, Field, CurContext}; ExprEvalContexts.back().IsCurrentlyCheckingDefaultArgumentOrInitializer = NestedDefaultChecking; - + // Pass down lifetime extending flag, and collect temporaries in + // CreateMaterializeTemporaryExpr when we rewrite the call argument. + currentEvaluationContext().InLifetimeExtendingContext = + parentEvaluationContext().InLifetimeExtendingContext; EnsureImmediateInvocationInDefaultArgs Immediate(*this); ExprResult Res; runWithSufficientStackSpace(Loc, [&] { @@ -17675,11 +17694,10 @@ void Sema::PopExpressionEvaluationContext() { // Append the collected materialized temporaries into previous context before // exit if the previous also is a lifetime extending context. - auto &PrevRecord = parentEvaluationContext(); if (getLangOpts().CPlusPlus23 && Rec.InLifetimeExtendingContext && - PrevRecord.InLifetimeExtendingContext && + parentEvaluationContext().InLifetimeExtendingContext && !Rec.ForRangeLifetimeExtendTemps.empty()) { - PrevRecord.ForRangeLifetimeExtendTemps.append( + parentEvaluationContext().ForRangeLifetimeExtendTemps.append( Rec.ForRangeLifetimeExtendTemps); } diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index a14a086731c13d..ac3fe6ab8f9bd0 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1540,9 +1540,6 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo, bool ListInitialization) { QualType Ty = TInfo->getType(); SourceLocation TyBeginLoc = TInfo->getTypeLoc().getBeginLoc(); - - assert((!ListInitialization || Exprs.size() == 1) && - "List initialization must have exactly one expression."); SourceRange FullRange = SourceRange(TyBeginLoc, RParenOrBraceLoc); InitializedEntity Entity = @@ -4313,8 +4310,10 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, // from type to the elements of the to type without resizing the vector. static QualType adjustVectorType(ASTContext &Context, QualType FromTy, QualType ToType, QualType *ElTy = nullptr) { - auto *ToVec = ToType->castAs(); - QualType ElType = ToVec->getElementType(); + QualType ElType = ToType; + if (auto *ToVec = ToType->getAs()) + ElType = ToVec->getElementType(); + if (ElTy) *ElTy = ElType; if (!FromTy->isVectorType()) @@ -4475,7 +4474,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, case ICK_Integral_Conversion: { QualType ElTy = ToType; QualType StepTy = ToType; - if (ToType->isVectorType()) + if (FromType->isVectorType() || ToType->isVectorType()) StepTy = adjustVectorType(Context, FromType, ToType, &ElTy); if (ElTy->isBooleanType()) { assert(FromType->castAs()->getDecl()->isFixed() && @@ -4495,7 +4494,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, case ICK_Floating_Promotion: case ICK_Floating_Conversion: { QualType StepTy = ToType; - if (ToType->isVectorType()) + if (FromType->isVectorType() || ToType->isVectorType()) StepTy = adjustVectorType(Context, FromType, ToType); From = ImpCastExprToType(From, StepTy, CK_FloatingCast, VK_PRValue, /*BasePath=*/nullptr, CCK) @@ -4527,7 +4526,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, case ICK_Floating_Integral: { QualType ElTy = ToType; QualType StepTy = ToType; - if (ToType->isVectorType()) + if (FromType->isVectorType() || ToType->isVectorType()) StepTy = adjustVectorType(Context, FromType, ToType, &ElTy); if (ElTy->isRealFloatingType()) From = ImpCastExprToType(From, StepTy, CK_IntegralToFloating, VK_PRValue, @@ -4669,11 +4668,11 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, } QualType ElTy = FromType; QualType StepTy = ToType; - if (FromType->isVectorType()) { - if (getLangOpts().HLSL) - StepTy = adjustVectorType(Context, FromType, ToType); + if (FromType->isVectorType()) ElTy = FromType->castAs()->getElementType(); - } + if (getLangOpts().HLSL && + (FromType->isVectorType() || ToType->isVectorType())) + StepTy = adjustVectorType(Context, FromType, ToType); From = ImpCastExprToType(From, StepTy, ScalarTypeToBooleanCastKind(ElTy), VK_PRValue, @@ -4828,8 +4827,8 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, // TODO: Support HLSL matrices. assert((!From->getType()->isMatrixType() && !ToType->isMatrixType()) && "Dimension conversion for matrix types is not implemented yet."); - assert(ToType->isVectorType() && - "Dimension conversion is only supported for vector types."); + assert((ToType->isVectorType() || ToType->isBuiltinType()) && + "Dimension conversion output must be vector or scalar type."); switch (SCS.Dimension) { case ICK_HLSL_Vector_Splat: { // Vector splat from any arithmetic type to a vector. @@ -4841,18 +4840,18 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, } case ICK_HLSL_Vector_Truncation: { // Note: HLSL built-in vectors are ExtVectors. Since this truncates a - // vector to a smaller vector, this can only operate on arguments where - // the source and destination types are ExtVectors. - assert(From->getType()->isExtVectorType() && ToType->isExtVectorType() && - "HLSL vector truncation should only apply to ExtVectors"); + // vector to a smaller vector or to a scalar, this can only operate on + // arguments where the source type is an ExtVector and the destination + // type is destination type is either an ExtVectorType or a builtin scalar + // type. auto *FromVec = From->getType()->castAs(); - auto *ToVec = ToType->castAs(); - QualType ElType = FromVec->getElementType(); - QualType TruncTy = - Context.getExtVectorType(ElType, ToVec->getNumElements()); + QualType TruncTy = FromVec->getElementType(); + if (auto *ToVec = ToType->getAs()) + TruncTy = Context.getExtVectorType(TruncTy, ToVec->getNumElements()); From = ImpCastExprToType(From, TruncTy, CK_HLSLVectorTruncation, From->getValueKind()) .get(); + break; } case ICK_Identity: diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 7dc17187524621..d21b8cb8c04e63 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -750,8 +750,21 @@ void InitListChecker::FillInEmptyInitForField(unsigned Init, FieldDecl *Field, if (Field->hasInClassInitializer()) { if (VerifyOnly) return; - - ExprResult DIE = SemaRef.BuildCXXDefaultInitExpr(Loc, Field); + ExprResult DIE; + { + // Enter a default initializer rebuild context, then we can support + // lifetime extension of temporary created by aggregate initialization + // using a default member initializer. + // CWG1815 (https://wg21.link/CWG1815). + EnterExpressionEvaluationContext RebuildDefaultInit( + SemaRef, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); + SemaRef.currentEvaluationContext().RebuildDefaultArgOrDefaultInit = + true; + SemaRef.currentEvaluationContext().DelayedDefaultInitializationContext = + SemaRef.parentEvaluationContext() + .DelayedDefaultInitializationContext; + DIE = SemaRef.BuildCXXDefaultInitExpr(Loc, Field); + } if (DIE.isInvalid()) { hadError = true; return; @@ -7521,10 +7534,8 @@ Sema::CreateMaterializeTemporaryExpr(QualType T, Expr *Temporary, // are done in both CreateMaterializeTemporaryExpr and MaybeBindToTemporary, // but there may be a chance to merge them. Cleanup.setExprNeedsCleanups(false); - if (isInLifetimeExtendingContext()) { - auto &Record = ExprEvalContexts.back(); - Record.ForRangeLifetimeExtendTemps.push_back(MTE); - } + if (isInLifetimeExtendingContext()) + currentEvaluationContext().ForRangeLifetimeExtendTemps.push_back(MTE); return MTE; } diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 861b0a91240b3b..ea72d3f003cbc4 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -2032,26 +2032,42 @@ static bool IsVectorConversion(Sema &S, QualType FromType, QualType ToType, if (S.Context.hasSameUnqualifiedType(FromType, ToType)) return false; + // HLSL allows implicit truncation of vector types. + if (S.getLangOpts().HLSL) { + auto *ToExtType = ToType->getAs(); + auto *FromExtType = FromType->getAs(); + + // If both arguments are vectors, handle possible vector truncation and + // element conversion. + if (ToExtType && FromExtType) { + unsigned FromElts = FromExtType->getNumElements(); + unsigned ToElts = ToExtType->getNumElements(); + if (FromElts < ToElts) + return false; + if (FromElts == ToElts) + ElConv = ICK_Identity; + else + ElConv = ICK_HLSL_Vector_Truncation; + + QualType FromElTy = FromExtType->getElementType(); + QualType ToElTy = ToExtType->getElementType(); + if (S.Context.hasSameUnqualifiedType(FromElTy, ToElTy)) + return true; + return IsVectorElementConversion(S, FromElTy, ToElTy, ICK, From); + } + if (FromExtType && !ToExtType) { + ElConv = ICK_HLSL_Vector_Truncation; + QualType FromElTy = FromExtType->getElementType(); + if (S.Context.hasSameUnqualifiedType(FromElTy, ToType)) + return true; + return IsVectorElementConversion(S, FromElTy, ToType, ICK, From); + } + // Fallthrough for the case where ToType is a vector and FromType is not. + } + // There are no conversions between extended vector types, only identity. if (auto *ToExtType = ToType->getAs()) { if (auto *FromExtType = FromType->getAs()) { - // HLSL allows implicit truncation of vector types. - if (S.getLangOpts().HLSL) { - unsigned FromElts = FromExtType->getNumElements(); - unsigned ToElts = ToExtType->getNumElements(); - if (FromElts < ToElts) - return false; - if (FromElts == ToElts) - ElConv = ICK_Identity; - else - ElConv = ICK_HLSL_Vector_Truncation; - - QualType FromElTy = FromExtType->getElementType(); - QualType ToElTy = ToExtType->getElementType(); - if (S.Context.hasSameUnqualifiedType(FromElTy, ToElTy)) - return true; - return IsVectorElementConversion(S, FromElTy, ToElTy, ICK, From); - } // There are no conversions between extended vector types other than the // identity conversion. return false; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 6df412cbb09c83..bb311e38409280 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -5481,7 +5481,10 @@ void Sema::InstantiateVariableInitializer( EnterExpressionEvaluationContext Evaluated( *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated, Var); - keepInLifetimeExtendingContext(); + currentEvaluationContext().InLifetimeExtendingContext = + parentEvaluationContext().InLifetimeExtendingContext; + currentEvaluationContext().RebuildDefaultArgOrDefaultInit = + parentEvaluationContext().RebuildDefaultArgOrDefaultInit; // Instantiate the initializer. ExprResult Init; diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 0daf620b4123e4..4bbc024587915c 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -4254,7 +4254,10 @@ ExprResult TreeTransform::TransformInitializer(Expr *Init, getSema(), EnterExpressionEvaluationContext::InitList, Construct->isListInitialization()); - getSema().keepInLifetimeExtendingContext(); + getSema().currentEvaluationContext().InLifetimeExtendingContext = + getSema().parentEvaluationContext().InLifetimeExtendingContext; + getSema().currentEvaluationContext().RebuildDefaultArgOrDefaultInit = + getSema().parentEvaluationContext().RebuildDefaultArgOrDefaultInit; SmallVector NewArgs; bool ArgChanged = false; if (getDerived().TransformExprs(Construct->getArgs(), Construct->getNumArgs(), @@ -8924,8 +8927,9 @@ TreeTransform::TransformCXXForRangeStmt(CXXForRangeStmt *S) { // P2718R0 - Lifetime extension in range-based for loops. if (getSema().getLangOpts().CPlusPlus23) { - auto &LastRecord = getSema().ExprEvalContexts.back(); + auto &LastRecord = getSema().currentEvaluationContext(); LastRecord.InLifetimeExtendingContext = true; + LastRecord.RebuildDefaultArgOrDefaultInit = true; } StmtResult Init = S->getInit() ? getDerived().TransformStmt(S->getInit()) : StmtResult(); @@ -14443,6 +14447,13 @@ TreeTransform::TransformCXXTemporaryObjectExpr( if (TransformExprs(E->getArgs(), E->getNumArgs(), true, Args, &ArgumentChanged)) return ExprError(); + + if (E->isListInitialization() && !E->isStdInitListInitialization()) { + ExprResult Res = RebuildInitList(E->getBeginLoc(), Args, E->getEndLoc()); + if (Res.isInvalid()) + return ExprError(); + Args = {Res.get()}; + } } if (!getDerived().AlwaysRebuild() && @@ -14454,12 +14465,9 @@ TreeTransform::TransformCXXTemporaryObjectExpr( return SemaRef.MaybeBindToTemporary(E); } - // FIXME: We should just pass E->isListInitialization(), but we're not - // prepared to handle list-initialization without a child InitListExpr. SourceLocation LParenLoc = T->getTypeLoc().getEndLoc(); return getDerived().RebuildCXXTemporaryObjectExpr( - T, LParenLoc, Args, E->getEndLoc(), - /*ListInitialization=*/LParenLoc.isInvalid()); + T, LParenLoc, Args, E->getEndLoc(), E->isListInitialization()); } template diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 0ee53e43dff39c..e5a1e20a265616 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -9782,8 +9782,7 @@ void ASTReader::finishPendingActions() { !PendingDeducedVarTypes.empty() || !PendingIncompleteDeclChains.empty() || !PendingDeclChains.empty() || !PendingMacroIDs.empty() || !PendingDeclContextInfos.empty() || !PendingUpdateRecords.empty() || - !PendingObjCExtensionIvarRedeclarations.empty() || - !PendingLambdas.empty()) { + !PendingObjCExtensionIvarRedeclarations.empty()) { // If any identifiers with corresponding top-level declarations have // been loaded, load those declarations now. using TopLevelDeclsMap = @@ -9928,11 +9927,6 @@ void ASTReader::finishPendingActions() { } PendingObjCExtensionIvarRedeclarations.pop_back(); } - - // Load any pendiong lambdas. - for (auto ID : PendingLambdas) - GetDecl(ID); - PendingLambdas.clear(); } // At this point, all update records for loaded decls are in place, so any diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 20e577404d997d..9272e23c7da3fc 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -1155,16 +1155,6 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { for (unsigned I = 0; I != NumParams; ++I) Params.push_back(readDeclAs()); FD->setParams(Reader.getContext(), Params); - - // For the first decl add all lambdas inside for loading them later, - // otherwise skip them. - unsigned NumLambdas = Record.readInt(); - if (FD->isFirstDecl()) { - for (unsigned I = 0; I != NumLambdas; ++I) - Reader.PendingLambdas.push_back(Record.readDeclID()); - } else { - Record.skipInts(NumLambdas); - } } void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) { diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 732a6e21f340d6..555f6325da646b 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -18,7 +18,6 @@ #include "clang/AST/Expr.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/PrettyDeclStackTrace.h" -#include "clang/AST/StmtVisitor.h" #include "clang/Basic/SourceManager.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ASTRecordWriter.h" @@ -626,33 +625,6 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) { : QualType()); } -static llvm::SmallVector collectLambdas(FunctionDecl *D) { - struct LambdaCollector : public ConstStmtVisitor { - llvm::SmallVectorImpl &Lambdas; - - LambdaCollector(llvm::SmallVectorImpl &Lambdas) - : Lambdas(Lambdas) {} - - void VisitLambdaExpr(const LambdaExpr *E) { - VisitStmt(E); - Lambdas.push_back(E->getLambdaClass()); - } - - void VisitStmt(const Stmt *S) { - if (!S) - return; - for (const Stmt *Child : S->children()) - if (Child) - Visit(Child); - } - }; - - llvm::SmallVector Lambdas; - if (D->hasBody()) - LambdaCollector(Lambdas).VisitStmt(D->getBody()); - return Lambdas; -} - void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { static_assert(DeclContext::NumFunctionDeclBits == 44, "You need to update the serializer after you change the " @@ -792,19 +764,6 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { Record.push_back(D->param_size()); for (auto *P : D->parameters()) Record.AddDeclRef(P); - - // Store references to all lambda decls inside function to load them - // immediately after loading the function to make sure that canonical - // decls for lambdas will be from the same module. - if (D->isCanonicalDecl()) { - llvm::SmallVector Lambdas = collectLambdas(D); - Record.push_back(Lambdas.size()); - for (const auto *L : Lambdas) - Record.AddDeclRef(L); - } else { - Record.push_back(0); - } - Code = serialization::DECL_FUNCTION; } @@ -2280,7 +2239,6 @@ getFunctionDeclAbbrev(serialization::DeclCode Code) { // // This is: // NumParams and Params[] from FunctionDecl, and - // NumLambdas, Lambdas[] from FunctionDecl, and // NumOverriddenMethods, OverriddenMethods[] from CXXMethodDecl. // // Add an AbbrevOp for 'size then elements' and use it here. diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index 49bbff1942167b..f48b2fd9dca71b 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -143,6 +143,16 @@ bool isReturnValueRefCounted(const clang::FunctionDecl *F) { return false; } +std::optional isUncounted(const QualType T) { + if (auto *Subst = dyn_cast(T)) { + if (auto *Decl = Subst->getAssociatedDecl()) { + if (isRefType(safeGetName(Decl))) + return false; + } + } + return isUncounted(T->getAsCXXRecordDecl()); +} + std::optional isUncounted(const CXXRecordDecl* Class) { // Keep isRefCounted first as it's cheaper. @@ -231,11 +241,9 @@ bool isSingleton(const FunctionDecl *F) { if (!MethodDecl->isStatic()) return false; } - const auto &Name = safeGetName(F); - std::string SingletonStr = "singleton"; - auto index = Name.find(SingletonStr); - return index != std::string::npos && - index == Name.size() - SingletonStr.size(); + const auto &NameStr = safeGetName(F); + StringRef Name = NameStr; // FIXME: Make safeGetName return StringRef. + return Name == "singleton" || Name.ends_with("Singleton"); } // We only care about statements so let's use the simple @@ -397,6 +405,7 @@ class TrivialFunctionAnalysisVisitor return true; if (Name == "WTFCrashWithInfo" || Name == "WTFBreakpointTrap" || + Name == "WTFReportBacktrace" || Name == "WTFCrashWithSecurityImplication" || Name == "WTFCrash" || Name == "WTFReportAssertionFailure" || Name == "isMainThread" || Name == "isMainThreadOrGCThread" || Name == "isMainRunLoop" || diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h index ec1db1cc335807..2932e62ad06e4b 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h @@ -20,6 +20,7 @@ class CXXMethodDecl; class CXXRecordDecl; class Decl; class FunctionDecl; +class QualType; class Stmt; class Type; @@ -42,6 +43,10 @@ std::optional isRefCountable(const clang::CXXRecordDecl* Class); /// \returns true if \p Class is ref-counted, false if not. bool isRefCounted(const clang::CXXRecordDecl *Class); +/// \returns true if \p Class is ref-countable AND not ref-counted, false if +/// not, std::nullopt if inconclusive. +std::optional isUncounted(const clang::QualType T); + /// \returns true if \p Class is ref-countable AND not ref-counted, false if /// not, std::nullopt if inconclusive. std::optional isUncounted(const clang::CXXRecordDecl* Class); diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp index 704c082a4d1d63..81c2434ce64775 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp @@ -87,8 +87,7 @@ class UncountedCallArgsChecker } auto *E = MemberCallExpr->getImplicitObjectArgument(); QualType ArgType = MemberCallExpr->getObjectType(); - std::optional IsUncounted = - isUncounted(ArgType->getAsCXXRecordDecl()); + std::optional IsUncounted = isUncounted(ArgType); if (IsUncounted && *IsUncounted && !isPtrOriginSafe(E)) reportBugOnThis(E); } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp index 7458ef484b16c4..4fb5977580497c 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp @@ -15,9 +15,9 @@ using namespace dependencies; DependencyScanningService::DependencyScanningService( ScanningMode Mode, ScanningOutputFormat Format, - ScanningOptimizations OptimizeArgs, bool EagerLoadModules) + ScanningOptimizations OptimizeArgs, bool EagerLoadModules, bool TraceVFS) : Mode(Mode), Format(Format), OptimizeArgs(OptimizeArgs), - EagerLoadModules(EagerLoadModules) { + EagerLoadModules(EagerLoadModules), TraceVFS(TraceVFS) { // Initialize targets for object file support. llvm::InitializeAllTargets(); llvm::InitializeAllTargetMCs(); diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp index 09ad5ebc7954cf..d77187bfb1f2b8 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -501,6 +501,9 @@ DependencyScanningWorker::DependencyScanningWorker( // The scanner itself writes only raw ast files. PCHContainerOps->registerWriter(std::make_unique()); + if (Service.shouldTraceVFS()) + FS = llvm::makeIntrusiveRefCnt(std::move(FS)); + switch (Service.getMode()) { case ScanningMode::DependencyDirectivesScan: DepFS = diff --git a/clang/test/AST/HLSL/StructuredBuffer-AST.hlsl b/clang/test/AST/HLSL/StructuredBuffer-AST.hlsl new file mode 100644 index 00000000000000..11d84ac7b85db2 --- /dev/null +++ b/clang/test/AST/HLSL/StructuredBuffer-AST.hlsl @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -DEMPTY %s | FileCheck -check-prefix=EMPTY %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump %s | FileCheck %s + + +// This test tests two different AST generations. The "EMPTY" test mode verifies +// the AST generated by forward declaration of the HLSL types which happens on +// initializing the HLSL external AST with an AST Context. + +// The non-empty mode has a use that requires the StructuredBuffer type be complete, +// which results in the AST being populated by the external AST source. That +// case covers the full implementation of the template declaration and the +// instantiated specialization. + +// EMPTY: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <> implicit StructuredBuffer +// EMPTY-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <> class depth 0 index 0 element_type +// EMPTY-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <> implicit class StructuredBuffer +// EMPTY-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <> Implicit final + +// There should be no more occurrances of StructuredBuffer +// EMPTY-NOT: StructuredBuffer + +#ifndef EMPTY + +StructuredBuffer Buffer; + +#endif + +// CHECK: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <> implicit StructuredBuffer +// CHECK-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <> class depth 0 index 0 element_type +// CHECK-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <> implicit class StructuredBuffer definition + +// CHECK: FinalAttr 0x{{[0-9A-Fa-f]+}} <> Implicit final +// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <> implicit h 'element_type * {{\[\[}}hlsl::resource_class(UAV)]]':'element_type *' +// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <> Implicit TypedBuffer + +// CHECK: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <> operator[] 'element_type &const (unsigned int) const' +// CHECK-NEXT: ParmVarDecl 0x{{[0-9A-Fa-f]+}} <> Idx 'unsigned int' +// CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <> +// CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <> +// CHECK-NEXT: ArraySubscriptExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type' lvalue +// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type * {{\[\[}}hlsl::resource_class(UAV)]]':'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <> 'const StructuredBuffer' lvalue implicit this +// CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int' +// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <> Implicit always_inline + +// CHECK-NEXT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <> operator[] 'element_type &(unsigned int)' +// CHECK-NEXT: ParmVarDecl 0x{{[0-9A-Fa-f]+}} <> Idx 'unsigned int' +// CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <> +// CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <> +// CHECK-NEXT: ArraySubscriptExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type' lvalue +// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type * {{\[\[}}hlsl::resource_class(UAV)]]':'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <> 'StructuredBuffer' lvalue implicit this +// CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int' +// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <> Implicit always_inline + +// CHECK: ClassTemplateSpecializationDecl 0x{{[0-9A-Fa-f]+}} <> class StructuredBuffer definition + +// CHECK: TemplateArgument type 'float' +// CHECK-NEXT: BuiltinType 0x{{[0-9A-Fa-f]+}} 'float' +// CHECK-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <> Implicit final +// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <> implicit referenced h 'float * {{\[\[}}hlsl::resource_class(UAV)]]':'float *' +// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <> Implicit TypedBuffer diff --git a/clang/test/AST/ast-dump-default-init-json.cpp b/clang/test/AST/ast-dump-default-init-json.cpp index 1058b4e3ea4d93..f4949a9c9eedf4 100644 --- a/clang/test/AST/ast-dump-default-init-json.cpp +++ b/clang/test/AST/ast-dump-default-init-json.cpp @@ -789,10 +789,10 @@ void test() { // CHECK-NEXT: "valueCategory": "lvalue", // CHECK-NEXT: "extendingDecl": { // CHECK-NEXT: "id": "0x{{.*}}", -// CHECK-NEXT: "kind": "FieldDecl", -// CHECK-NEXT: "name": "a", +// CHECK-NEXT: "kind": "VarDecl", +// CHECK-NEXT: "name": "b", // CHECK-NEXT: "type": { -// CHECK-NEXT: "qualType": "const A &" +// CHECK-NEXT: "qualType": "B" // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "storageDuration": "automatic", diff --git a/clang/test/AST/ast-dump-default-init.cpp b/clang/test/AST/ast-dump-default-init.cpp index 15b29f04bf21bf..26864fbf15424d 100644 --- a/clang/test/AST/ast-dump-default-init.cpp +++ b/clang/test/AST/ast-dump-default-init.cpp @@ -13,7 +13,7 @@ void test() { } // CHECK: -CXXDefaultInitExpr 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue has rewritten init // CHECK-NEXT: `-ExprWithCleanups 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue -// CHECK-NEXT: `-MaterializeTemporaryExpr 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue extended by Field 0x{{[^ ]*}} 'a' 'const A &' +// CHECK-NEXT: `-MaterializeTemporaryExpr 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue extended by Var 0x{{[^ ]*}} 'b' 'B' // CHECK-NEXT: `-ImplicitCastExpr 0x{{[^ ]*}} <{{.*}}> 'const A' // CHECK-NEXT: `-CXXFunctionalCastExpr 0x{{[^ ]*}} <{{.*}}> 'A' functional cast to A // CHECK-NEXT: `-InitListExpr 0x{{[^ ]*}} <{{.*}}> 'A' diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp index a98c6eb9c84d97..97efb354f0371d 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp @@ -6,6 +6,7 @@ void WTFBreakpointTrap(); void WTFCrashWithInfo(int, const char*, const char*, int); void WTFReportAssertionFailure(const char* file, int line, const char* function, const char* assertion); +void WTFReportBacktrace(void); void WTFCrash(void); void WTFCrashWithSecurityImplication(void); @@ -334,6 +335,7 @@ class RefCounted { } unsigned trivial60() { return ObjectWithNonTrivialDestructor { 5 }.value(); } unsigned trivial61() { return DerivedNumber('7').value(); } + void trivial62() { WTFReportBacktrace(); } static RefCounted& singleton() { static RefCounted s_RefCounted; @@ -341,6 +343,12 @@ class RefCounted { return s_RefCounted; } + static RefCounted& otherSingleton() { + static RefCounted s_RefCounted; + s_RefCounted.ref(); + return s_RefCounted; + } + Number nonTrivial1() { return Number(3) + Number(4); } Number nonTrivial2() { return Number { 0.3 }; } int nonTrivial3() { return v ? otherFunction() : 0; } @@ -506,9 +514,12 @@ class UnrelatedClass { getFieldTrivial().trivial59(); // no-warning getFieldTrivial().trivial60(); // no-warning getFieldTrivial().trivial61(); // no-warning + getFieldTrivial().trivial62(); // no-warning RefCounted::singleton().trivial18(); // no-warning RefCounted::singleton().someFunction(); // no-warning + RefCounted::otherSingleton().trivial18(); // no-warning + RefCounted::otherSingleton().someFunction(); // no-warning getFieldTrivial().recursiveTrivialFunction(7); // no-warning getFieldTrivial().recursiveComplexFunction(9); diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm new file mode 100644 index 00000000000000..db0c5b19eec5bb --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm @@ -0,0 +1,26 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s +// expected-no-diagnostics + +#import "mock-types.h" +#import "mock-system-header.h" +#import "../../Inputs/system-header-simulator-for-objc-dealloc.h" + +@interface Foo : NSObject + +@property (nonatomic, readonly) RefPtr countable; + +- (void)execute; +- (RefPtr)_protectedRefCountable; +@end + +@implementation Foo + +- (void)execute { + self._protectedRefCountable->method(); +} + +- (RefPtr)_protectedRefCountable { + return _countable; +} + +@end diff --git a/clang/test/Analysis/lifetime-extended-regions.cpp b/clang/test/Analysis/lifetime-extended-regions.cpp index 4e98bd4b0403eb..4458ad294af7cb 100644 --- a/clang/test/Analysis/lifetime-extended-regions.cpp +++ b/clang/test/Analysis/lifetime-extended-regions.cpp @@ -120,10 +120,11 @@ void aggregateWithReferences() { clang_analyzer_dump(viaReference); // expected-warning-re {{&lifetime_extended_object{RefAggregate, viaReference, S{{[0-9]+}}} }} clang_analyzer_dump(viaReference.rx); // expected-warning-re {{&lifetime_extended_object{int, viaReference, S{{[0-9]+}}} }} clang_analyzer_dump(viaReference.ry); // expected-warning-re {{&lifetime_extended_object{Composite, viaReference, S{{[0-9]+}}} }} - - // clang does not currently implement extending lifetime of object bound to reference members of aggregates, - // that are created from default member initializer (see `warn_unsupported_lifetime_extension` from `-Wdangling`) - RefAggregate defaultInitExtended{i}; // clang-bug does not extend `Composite` + + // FIXME: clang currently support extending lifetime of object bound to reference members of aggregates, + // that are created from default member initializer. But CFG and ExprEngine need to be updated to address this change. + // The following expect warning: {{&lifetime_extended_object{Composite, defaultInitExtended, S{{[0-9]+}}} }} + RefAggregate defaultInitExtended{i}; clang_analyzer_dump(defaultInitExtended.ry); // expected-warning {{Unknown }} } diff --git a/clang/test/CXX/drs/cwg16xx.cpp b/clang/test/CXX/drs/cwg16xx.cpp index cf6b45ceabf2cc..95e241f0d03e9b 100644 --- a/clang/test/CXX/drs/cwg16xx.cpp +++ b/clang/test/CXX/drs/cwg16xx.cpp @@ -449,6 +449,27 @@ namespace cwg1696 { // cwg1696: 7 // since-cxx14-note@-2 {{default member initializer declared here}} }; A a{a, a}; + + struct A1 { + A1() : v(42) {} + // since-cxx14-error@-1 {{reference member 'v' binds to a temporary object whose lifetime would be shorter than the lifetime of the constructed object}} + // since-cxx14-note@#cwg1696-A1 {{reference member declared here}} + const int &v; // #cwg1696-A1 + }; + + struct A2 { + A2() = default; + // since-cxx14-error@-1 {{reference member 'v' binds to a temporary object whose lifetime would be shorter than the lifetime of the constructed object}} + // since-cxx14-note-re@#cwg1696-A2-b {{in defaulted default constructor for {{.*}} first required here}} + // since-cxx14-note@#cwg1696-A2-a {{initializing field 'v' with default member initializer}} + A2(int v) : v(v) {} + // since-cxx14-warning@-1 {{binding reference member 'v' to stack allocated parameter 'v'}} + // since-cxx14-note@#cwg1696-A2-a {{reference member declared here}} + const int &v = 42; // #cwg1696-A2-a + }; + A2 a1; // #cwg1696-A2-b + + A2 a2(1); // OK, unfortunately #endif } @@ -483,8 +504,6 @@ namespace cwg1696 { // cwg1696: 7 const A &a = A(); // #cwg1696-D1-a }; D1 d1 = {}; // #cwg1696-d1 - // since-cxx14-warning@-1 {{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported; lifetime of temporary will end at the end of the full-expression}} - // since-cxx14-note@#cwg1696-D1-a {{initializing field 'a' with default member initializer}} struct D2 { const A &a = A(); // #cwg1696-D2-a diff --git a/clang/test/CXX/drs/cwg18xx.cpp b/clang/test/CXX/drs/cwg18xx.cpp index 61b7faa96a9fbb..7f0fb8cf589d48 100644 --- a/clang/test/CXX/drs/cwg18xx.cpp +++ b/clang/test/CXX/drs/cwg18xx.cpp @@ -206,19 +206,28 @@ namespace cwg1814 { // cwg1814: yes #endif } -namespace cwg1815 { // cwg1815: no +namespace cwg1815 { // cwg1815: 20 #if __cplusplus >= 201402L - // FIXME: needs codegen test - struct A { int &&r = 0; }; // #cwg1815-A + struct A { int &&r = 0; }; A a = {}; - // since-cxx14-warning@-1 {{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported; lifetime of temporary will end at the end of the full-expression}} FIXME - // since-cxx14-note@#cwg1815-A {{initializing field 'r' with default member initializer}} struct B { int &&r = 0; }; // #cwg1815-B // since-cxx14-error@-1 {{reference member 'r' binds to a temporary object whose lifetime would be shorter than the lifetime of the constructed object}} // since-cxx14-note@#cwg1815-B {{initializing field 'r' with default member initializer}} // since-cxx14-note@#cwg1815-b {{in implicit default constructor for 'cwg1815::B' first required here}} B b; // #cwg1815-b + +#if __cplusplus >= 201703L + struct C { const int &r = 0; }; + constexpr C c = {}; // OK, since cwg1815 + static_assert(c.r == 0); + + constexpr int f() { + A a = {}; // OK, since cwg1815 + return a.r; + } + static_assert(f() == 0); +#endif #endif } diff --git a/clang/test/CXX/special/class.temporary/p6.cpp b/clang/test/CXX/special/class.temporary/p6.cpp index 5554363cc69abb..a6d2adfd1fd2c5 100644 --- a/clang/test/CXX/special/class.temporary/p6.cpp +++ b/clang/test/CXX/special/class.temporary/p6.cpp @@ -269,6 +269,40 @@ void init_capture_init_list() { // CHECK: } } +void check_dr1815() { // dr1815: yes +#if __cplusplus >= 201402L + + struct A { + int &&r = 0; + ~A() {} + }; + + struct B { + A &&a = A{}; + ~B() {} + }; + B a = {}; + + // CHECK: call {{.*}}block_scope_begin_function + extern void block_scope_begin_function(); + extern void block_scope_end_function(); + block_scope_begin_function(); + { + // CHECK: call void @_ZZ12check_dr1815vEN1BD1Ev + // CHECK: call void @_ZZ12check_dr1815vEN1AD1Ev + B b = {}; + } + // CHECK: call {{.*}}block_scope_end_function + block_scope_end_function(); + + // CHECK: call {{.*}}some_other_function + extern void some_other_function(); + some_other_function(); + // CHECK: call void @_ZZ12check_dr1815vEN1BD1Ev + // CHECK: call void @_ZZ12check_dr1815vEN1AD1Ev +#endif +} + namespace P2718R0 { namespace basic { template using T2 = std::list; diff --git a/clang/test/ClangScanDeps/verbose.test b/clang/test/ClangScanDeps/verbose.test new file mode 100644 index 00000000000000..99c5214c762018 --- /dev/null +++ b/clang/test/ClangScanDeps/verbose.test @@ -0,0 +1,28 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json + +// RUN: clang-scan-deps -compilation-database %t/cdb.json -v -o %t/result.json 2>&1 | FileCheck %s +// CHECK: *** Virtual File System Stats: +// CHECK-NEXT: {{[[:digit:]]+}} status() calls +// CHECK-NEXT: {{[[:digit:]]+}} openFileForRead() calls +// CHECK-NEXT: {{[[:digit:]]+}} dir_begin() calls +// CHECK-NEXT: {{[[:digit:]]+}} getRealPath() calls +// CHECK-NEXT: {{[[:digit:]]+}} exists() calls +// CHECK-NEXT: {{[[:digit:]]+}} isLocal() calls + +//--- tu.c + +//--- cdb.json.in +[ + { + "file": "DIR/tu.c" + "directory": "DIR", + "command": "clang -c DIR/tu.c -o DIR/tu.o" + }, + { + "file": "DIR/tu.c" + "directory": "DIR", + "command": "clang -c DIR/tu.c -o DIR/tu.o" + } +] diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 3010b8954f1c2e..8943a92faad044 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -834,16 +834,16 @@ f16x8 splat_f16x8(float a) { return __builtin_wasm_splat_f16x8(a); } -float extract_lane_f16x8(f16x8 a, int i) { - // WEBASSEMBLY: %0 = tail call float @llvm.wasm.extract.lane.f16x8(<8 x half> %a, i32 %i) +float extract_lane_f16x8(f16x8 a) { + // WEBASSEMBLY: %0 = tail call float @llvm.wasm.extract.lane.f16x8(<8 x half> %a, i32 7) // WEBASSEMBLY-NEXT: ret float %0 - return __builtin_wasm_extract_lane_f16x8(a, i); + return __builtin_wasm_extract_lane_f16x8(a, 7); } -f16x8 replace_lane_f16x8(f16x8 a, int i, float v) { - // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %a, i32 %i, float %v) +f16x8 replace_lane_f16x8(f16x8 a, float v) { + // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %a, i32 7, float %v) // WEBASSEMBLY-NEXT: ret <8 x half> %0 - return __builtin_wasm_replace_lane_f16x8(a, i, v); + return __builtin_wasm_replace_lane_f16x8(a, 7, v); } f16x8 min_f16x8(f16x8 a, f16x8 b) { diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index 60304647bd4c54..8695433f6df10c 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -175,7 +175,7 @@ __device__ void device_use() { // HIP-SAME: section ".hipFatBinSegment" // * variable to save GPU binary handle after initialization // CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global ptr null -// HIPNEF: @__[[PREFIX]]_gpubin_handle_{{[0-9a-f]+}} = external hidden global ptr, align 8 +// HIPNEF: @__[[PREFIX]]_gpubin_handle_{{[0-9a-f]+}} = internal global ptr null, align 8 // * constant unnamed string with NVModuleID // CUDARDC: [[MODULE_ID_GLOBAL:@.*]] = private constant // CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32 diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl index 5d751be6dae066..6478ea67e32a0d 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl @@ -117,3 +117,27 @@ void d4_to_b2() { vector d4 = 9.0; vector b2 = d4; } + +// CHECK-LABEL: d4_to_d1 +// CHECK: [[d4:%.*]] = alloca <4 x double> +// CHECK: [[d1:%.*]] = alloca <1 x double> +// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] +// CHECK: [[vecd1:%.*]] = shufflevector <4 x double> [[vecd4]], <4 x double> poison, <1 x i32> zeroinitializer +// CHECK: store <1 x double> [[vecd1]], ptr [[d1:%.*]], align 8 +void d4_to_d1() { + vector d4 = 9.0; + vector d1 = d4; +} + +// CHECK-LABEL: d4_to_dScalar +// CHECK: [[d4:%.*]] = alloca <4 x double> +// CHECK: [[d:%.*]] = alloca double +// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] +// CHECK: [[d4x:%.*]] = extractelement <4 x double> [[vecd4]], i32 0 +// CHECK: store double [[d4x]], ptr [[d]] +void d4_to_dScalar() { + vector d4 = 9.0; + double d = d4; +} diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl new file mode 100644 index 00000000000000..16b7295c985f77 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +StructuredBuffer Buffer1; +StructuredBuffer > BufferArray[4]; + +StructuredBuffer Buffer2 : register(u3); +StructuredBuffer > BufferArray2[4] : register(u4); + +StructuredBuffer Buffer3 : register(u3, space1); +StructuredBuffer > BufferArray3[4] : register(u4, space1); + +[numthreads(1,1,1)] +void main() { +} + +// CHECK: !hlsl.uavs = !{![[Single:[0-9]+]], ![[Array:[0-9]+]], ![[SingleAllocated:[0-9]+]], ![[ArrayAllocated:[0-9]+]], ![[SingleSpace:[0-9]+]], ![[ArraySpace:[0-9]+]]} +// CHECK-DAG: ![[Single]] = !{ptr @"?Buffer1@@3V?$StructuredBuffer@M@hlsl@@A", i32 10, i32 9, i1 false, i32 -1, i32 0} +// CHECK-DAG: ![[Array]] = !{ptr @"?BufferArray@@3PAV?$StructuredBuffer@T?$__vector@M$03@__clang@@@hlsl@@A", i32 10, i32 9, i1 false, i32 -1, i32 0} +// CHECK-DAG: ![[SingleAllocated]] = !{ptr @"?Buffer2@@3V?$StructuredBuffer@M@hlsl@@A", i32 10, i32 9, i1 false, i32 3, i32 0} +// CHECK-DAG: ![[ArrayAllocated]] = !{ptr @"?BufferArray2@@3PAV?$StructuredBuffer@T?$__vector@M$03@__clang@@@hlsl@@A", i32 10, i32 9, i1 false, i32 4, i32 0} +// CHECK-DAG: ![[SingleSpace]] = !{ptr @"?Buffer3@@3V?$StructuredBuffer@M@hlsl@@A", i32 10, i32 9, i1 false, i32 3, i32 1} +// CHECK-DAG: ![[ArraySpace]] = !{ptr @"?BufferArray3@@3PAV?$StructuredBuffer@T?$__vector@M$03@__clang@@@hlsl@@A", i32 10, i32 9, i1 false, i32 4, i32 1} diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-constructor.hlsl new file mode 100644 index 00000000000000..34019e5b186931 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-constructor.hlsl @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-SPIRV + +StructuredBuffer Buf; + +// CHECK: define linkonce_odr noundef ptr @"??0?$StructuredBuffer@M@hlsl@@QAA@XZ" +// CHECK-NEXT: entry: + +// CHECK: %[[HandleRes:[0-9]+]] = call ptr @llvm.dx.create.handle(i8 1) +// CHECK: store ptr %[[HandleRes]], ptr %h, align 4 + +// CHECK-SPIRV: %[[HandleRes:[0-9]+]] = call ptr @llvm.spv.create.handle(i8 1) +// CHECK-SPIRV: store ptr %[[HandleRes]], ptr %h, align 8 diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl new file mode 100644 index 00000000000000..8ddf8a6004403e --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s + +StructuredBuffer BufI16; +StructuredBuffer BufU16; +StructuredBuffer BufI32; +StructuredBuffer BufU32; +StructuredBuffer BufI64; +StructuredBuffer BufU64; +StructuredBuffer BufF16; +StructuredBuffer BufF32; +StructuredBuffer BufF64; +StructuredBuffer< vector > BufI16x4; +StructuredBuffer< vector > BufU32x3; +StructuredBuffer BufF16x2; +StructuredBuffer BufF32x3; +// TODO: StructuredBuffer BufSNormF16; -> 11 +// TODO: StructuredBuffer BufUNormF16; -> 12 +// TODO: StructuredBuffer BufSNormF32; -> 13 +// TODO: StructuredBuffer BufUNormF32; -> 14 +// TODO: StructuredBuffer BufSNormF64; -> 15 +// TODO: StructuredBuffer BufUNormF64; -> 16 + +[numthreads(1,1,1)] +void main(int GI : SV_GroupIndex) { + BufI16[GI] = 0; + BufU16[GI] = 0; + BufI32[GI] = 0; + BufU32[GI] = 0; + BufI64[GI] = 0; + BufU64[GI] = 0; + BufF16[GI] = 0; + BufF32[GI] = 0; + BufF64[GI] = 0; + BufI16x4[GI] = 0; + BufU32x3[GI] = 0; + BufF16x2[GI] = 0; + BufF32x3[GI] = 0; +} + +// CHECK: !{{[0-9]+}} = !{ptr @"?BufI16@@3V?$StructuredBuffer@F@hlsl@@A", i32 10, i32 2, +// CHECK: !{{[0-9]+}} = !{ptr @"?BufU16@@3V?$StructuredBuffer@G@hlsl@@A", i32 10, i32 3, +// CHECK: !{{[0-9]+}} = !{ptr @"?BufI32@@3V?$StructuredBuffer@H@hlsl@@A", i32 10, i32 4, +// CHECK: !{{[0-9]+}} = !{ptr @"?BufU32@@3V?$StructuredBuffer@I@hlsl@@A", i32 10, i32 5, +// CHECK: !{{[0-9]+}} = !{ptr @"?BufI64@@3V?$StructuredBuffer@J@hlsl@@A", i32 10, i32 6, +// CHECK: !{{[0-9]+}} = !{ptr @"?BufU64@@3V?$StructuredBuffer@K@hlsl@@A", i32 10, i32 7, +// CHECK: !{{[0-9]+}} = !{ptr @"?BufF16@@3V?$StructuredBuffer@$f16@@hlsl@@A", i32 10, i32 8, +// CHECK: !{{[0-9]+}} = !{ptr @"?BufF32@@3V?$StructuredBuffer@M@hlsl@@A", i32 10, i32 9, +// CHECK: !{{[0-9]+}} = !{ptr @"?BufF64@@3V?$StructuredBuffer@N@hlsl@@A", i32 10, i32 10, +// CHECK: !{{[0-9]+}} = !{ptr @"?BufI16x4@@3V?$StructuredBuffer@T?$__vector@F$03@__clang@@@hlsl@@A", i32 10, i32 2, +// CHECK: !{{[0-9]+}} = !{ptr @"?BufU32x3@@3V?$StructuredBuffer@T?$__vector@I$02@__clang@@@hlsl@@A", i32 10, i32 5, +// CHECK: !{{[0-9]+}} = !{ptr @"?BufF16x2@@3V?$StructuredBuffer@T?$__vector@$f16@$01@__clang@@@hlsl@@A", i32 10, i32 8, +// CHECK: !{{[0-9]+}} = !{ptr @"?BufF32x3@@3V?$StructuredBuffer@T?$__vector@M$02@__clang@@@hlsl@@A", i32 10, i32 9, diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-subscript.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-subscript.hlsl new file mode 100644 index 00000000000000..9bd885d94d7e75 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-subscript.hlsl @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -o - -O0 %s | FileCheck %s + +StructuredBuffer In; +StructuredBuffer Out; + +[numthreads(1,1,1)] +void main(unsigned GI : SV_GroupIndex) { + Out[GI] = In[GI]; +} + +// Even at -O0 the subscript operators get inlined. The -O0 IR is a bit messy +// and confusing to follow so the match here is pretty weak. + +// CHECK: define internal void @"?main@@YAXI@Z" +// CHECK-NOT: call +// CHECK: ret void diff --git a/clang/test/CodeGenHLSL/builtins/dot.hlsl b/clang/test/CodeGenHLSL/builtins/dot.hlsl index 2b76fae61147b4..3f6be04a595e23 100644 --- a/clang/test/CodeGenHLSL/builtins/dot.hlsl +++ b/clang/test/CodeGenHLSL/builtins/dot.hlsl @@ -155,18 +155,6 @@ float test_dot_float3(float3 p0, float3 p1) { return dot(p0, p1); } // CHECK: ret float %hlsl.dot float test_dot_float4(float4 p0, float4 p1) { return dot(p0, p1); } -// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v2f32(<2 x float> %splat.splat, <2 x float> -// CHECK: ret float %hlsl.dot -float test_dot_float2_splat(float p0, float2 p1) { return dot(p0, p1); } - -// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v3f32(<3 x float> %splat.splat, <3 x float> -// CHECK: ret float %hlsl.dot -float test_dot_float3_splat(float p0, float3 p1) { return dot(p0, p1); } - -// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v4f32(<4 x float> %splat.splat, <4 x float> -// CHECK: ret float %hlsl.dot -float test_dot_float4_splat(float p0, float4 p1) { return dot(p0, p1); } - // CHECK: %hlsl.dot = fmul double // CHECK: ret double %hlsl.dot double test_dot_double(double p0, double p1) { return dot(p0, p1); } diff --git a/clang/test/CodeGenHLSL/builtins/lerp.hlsl b/clang/test/CodeGenHLSL/builtins/lerp.hlsl index 298d157da00a35..b11046894bd889 100644 --- a/clang/test/CodeGenHLSL/builtins/lerp.hlsl +++ b/clang/test/CodeGenHLSL/builtins/lerp.hlsl @@ -56,21 +56,3 @@ float3 test_lerp_float3(float3 p0) { return lerp(p0, p0, p0); } // CHECK: %hlsl.lerp = call <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK: ret <4 x float> %hlsl.lerp float4 test_lerp_float4(float4 p0) { return lerp(p0, p0, p0); } - -// CHECK: %[[b:.*]] = load <2 x float>, ptr %p1.addr, align 8 -// CHECK: %[[c:.*]] = load <2 x float>, ptr %p1.addr, align 8 -// CHECK: %hlsl.lerp = call <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> %splat.splat, <2 x float> %[[b]], <2 x float> %[[c]]) -// CHECK: ret <2 x float> %hlsl.lerp -float2 test_lerp_float2_splat(float p0, float2 p1) { return lerp(p0, p1, p1); } - -// CHECK: %[[b:.*]] = load <3 x float>, ptr %p1.addr, align 16 -// CHECK: %[[c:.*]] = load <3 x float>, ptr %p1.addr, align 16 -// CHECK: %hlsl.lerp = call <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> %splat.splat, <3 x float> %[[b]], <3 x float> %[[c]]) -// CHECK: ret <3 x float> %hlsl.lerp -float3 test_lerp_float3_splat(float p0, float3 p1) { return lerp(p0, p1, p1); } - -// CHECK: %[[b:.*]] = load <4 x float>, ptr %p1.addr, align 16 -// CHECK: %[[c:.*]] = load <4 x float>, ptr %p1.addr, align 16 -// CHECK: %hlsl.lerp = call <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %splat.splat, <4 x float> %[[b]], <4 x float> %[[c]]) -// CHECK: ret <4 x float> %hlsl.lerp -float4 test_lerp_float4_splat(float p0, float4 p1) { return lerp(p0, p1, p1); } diff --git a/clang/test/CodeGenHLSL/builtins/mad.hlsl b/clang/test/CodeGenHLSL/builtins/mad.hlsl index 449a793caf93b7..265a2552c80fb4 100644 --- a/clang/test/CodeGenHLSL/builtins/mad.hlsl +++ b/clang/test/CodeGenHLSL/builtins/mad.hlsl @@ -263,21 +263,3 @@ uint64_t3 test_mad_uint64_t3(uint64_t3 p0, uint64_t3 p1, uint64_t3 p2) { return // SPIR_CHECK: mul nuw <4 x i64> %{{.*}}, %{{.*}} // SPIR_CHECK: add nuw <4 x i64> %{{.*}}, %{{.*}} uint64_t4 test_mad_uint64_t4(uint64_t4 p0, uint64_t4 p1, uint64_t4 p2) { return mad(p0, p1, p2); } - -// CHECK: %[[p1:.*]] = load <2 x float>, ptr %p1.addr, align 8 -// CHECK: %[[p2:.*]] = load <2 x float>, ptr %p2.addr, align 8 -// CHECK: %hlsl.fmad = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %splat.splat, <2 x float> %[[p1]], <2 x float> %[[p2]]) -// CHECK: ret <2 x float> %hlsl.fmad -float2 test_mad_float2_splat(float p0, float2 p1, float2 p2) { return mad(p0, p1, p2); } - -// CHECK: %[[p1:.*]] = load <3 x float>, ptr %p1.addr, align 16 -// CHECK: %[[p2:.*]] = load <3 x float>, ptr %p2.addr, align 16 -// CHECK: %hlsl.fmad = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %splat.splat, <3 x float> %[[p1]], <3 x float> %[[p2]]) -// CHECK: ret <3 x float> %hlsl.fmad -float3 test_mad_float3_splat(float p0, float3 p1, float3 p2) { return mad(p0, p1, p2); } - -// CHECK: %[[p1:.*]] = load <4 x float>, ptr %p1.addr, align 16 -// CHECK: %[[p2:.*]] = load <4 x float>, ptr %p2.addr, align 16 -// CHECK: %hlsl.fmad = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat, <4 x float> %[[p1]], <4 x float> %[[p2]]) -// CHECK: ret <4 x float> %hlsl.fmad -float4 test_mad_float4_splat(float p0, float4 p1, float4 p2) { return mad(p0, p1, p2); } diff --git a/clang/test/CodeGenHLSL/loops/unroll.hlsl b/clang/test/CodeGenHLSL/loops/unroll.hlsl index 7389f21dd3472b..efca0747805d4b 100644 --- a/clang/test/CodeGenHLSL/loops/unroll.hlsl +++ b/clang/test/CodeGenHLSL/loops/unroll.hlsl @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s /*** for ***/ @@ -35,8 +35,8 @@ void for_nested_one_unroll_enable() for( int j = 0; j < 10; ++j) s += i + j; } -// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_NESTED_ENABLE:.*]] -// CHECK-NOT: br label %{{.*}}, !llvm.loop ![[FOR_NESTED_1_ENABLE:.*]] +// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_NESTED_ENABLE_INNER:.*]] +// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_NESTED_ENABLE_OUTER:.*]] } void for_nested_two_unroll_enable() @@ -111,20 +111,26 @@ void do_enable() } -// CHECK: ![[FOR_DISTINCT]] = distinct !{![[FOR_DISTINCT]], ![[FOR_COUNT:.*]]} -// CHECK: ![[FOR_COUNT]] = !{!"llvm.loop.unroll.count", i32 8} -// CHECK: ![[FOR_DISABLE]] = distinct !{![[FOR_DISABLE]], ![[DISABLE:.*]]} -// CHECK: ![[DISABLE]] = !{!"llvm.loop.unroll.disable"} -// CHECK: ![[FOR_ENABLE]] = distinct !{![[FOR_ENABLE]], ![[ENABLE:.*]]} -// CHECK: ![[ENABLE]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[FOR_NESTED_ENABLE]] = distinct !{![[FOR_NESTED_ENABLE]], ![[ENABLE]]} -// CHECK: ![[FOR_NESTED2_ENABLE]] = distinct !{![[FOR_NESTED2_ENABLE]], ![[ENABLE]]} -// CHECK: ![[FOR_NESTED2_1_ENABLE]] = distinct !{![[FOR_NESTED2_1_ENABLE]], ![[ENABLE]]} -// CHECK: ![[WHILE_DISTINCT]] = distinct !{![[WHILE_DISTINCT]], ![[WHILE_COUNT:.*]]} -// CHECK: ![[WHILE_COUNT]] = !{!"llvm.loop.unroll.count", i32 4} -// CHECK: ![[WHILE_DISABLE]] = distinct !{![[WHILE_DISABLE]], ![[DISABLE]]} -// CHECK: ![[WHILE_ENABLE]] = distinct !{![[WHILE_ENABLE]], ![[ENABLE]]} -// CHECK: ![[DO_DISTINCT]] = distinct !{![[DO_DISTINCT]], ![[DO_COUNT:.*]]} -// CHECK: ![[DO_COUNT]] = !{!"llvm.loop.unroll.count", i32 16} -// CHECK: ![[DO_DISABLE]] = distinct !{![[DO_DISABLE]], ![[DISABLE]]} -// CHECK: ![[DO_ENABLE]] = distinct !{![[DO_ENABLE]], ![[ENABLE]]} +// CHECK-DAG: [[MUST_PROGRESS:.*]] = !{!"llvm.loop.mustprogress"} +// CHECK-DAG: [[DISABLE:.*]] = !{!"llvm.loop.unroll.disable"} +// CHECK-DAG: [[FOR_COUNT:.*]] = !{!"llvm.loop.unroll.count", i32 8} +// CHECK-DAG: [[ENABLE:.*]] = !{!"llvm.loop.unroll.enable"} +// CHECK-DAG: [[WHILE_COUNT:.*]] = !{!"llvm.loop.unroll.count", i32 4} +// CHECK-DAG: [[DO_COUNT:.*]] = !{!"llvm.loop.unroll.count", i32 16} + +// CHECK-DAG: ![[FOR_DISTINCT]] = distinct !{![[FOR_DISTINCT]], [[MUST_PROGRESS]], [[FOR_COUNT]]} +// CHECK-DAG: ![[FOR_DISABLE]] = distinct !{![[FOR_DISABLE]], [[MUST_PROGRESS]], [[DISABLE]]} +// CHECK-DAG: ![[FOR_ENABLE]] = distinct !{![[FOR_ENABLE]], [[MUST_PROGRESS]], [[ENABLE]]} + +// CHECK-DAG: ![[FOR_NESTED_ENABLE_INNER]] = distinct !{![[FOR_NESTED_ENABLE_INNER]], [[MUST_PROGRESS]]} +// CHECK-DAG: ![[FOR_NESTED_ENABLE_OUTER]] = distinct !{![[FOR_NESTED_ENABLE_OUTER]], [[MUST_PROGRESS]], [[ENABLE]]} +// CHECK-DAG: ![[FOR_NESTED2_ENABLE]] = distinct !{![[FOR_NESTED2_ENABLE]], [[MUST_PROGRESS]], [[ENABLE]]} +// CHECK-DAG: ![[FOR_NESTED2_1_ENABLE]] = distinct !{![[FOR_NESTED2_1_ENABLE]], [[MUST_PROGRESS]], [[ENABLE]]} +// CHECK-DAG: ![[WHILE_DISTINCT]] = distinct !{![[WHILE_DISTINCT]], [[MUST_PROGRESS]], [[WHILE_COUNT]]} + +// CHECK-DAG: ![[WHILE_DISABLE]] = distinct !{![[WHILE_DISABLE]], [[MUST_PROGRESS]], [[DISABLE]]} +// CHECK-DAG: ![[WHILE_ENABLE]] = distinct !{![[WHILE_ENABLE]], [[MUST_PROGRESS]], [[ENABLE]]} +// CHECK-DAG: ![[DO_DISTINCT]] = distinct !{![[DO_DISTINCT]], [[MUST_PROGRESS]], [[DO_COUNT]]} + +// CHECK-DAG: ![[DO_DISABLE]] = distinct !{![[DO_DISABLE]], [[MUST_PROGRESS]], [[DISABLE]]} +// CHECK-DAG: ![[DO_ENABLE]] = distinct !{![[DO_ENABLE]], [[MUST_PROGRESS]], [[ENABLE]]} diff --git a/clang/test/Driver/riscv-mcmodel.c b/clang/test/Driver/riscv-mcmodel.c index 4f5fa95f59b666..c27d7c63a75a4f 100644 --- a/clang/test/Driver/riscv-mcmodel.c +++ b/clang/test/Driver/riscv-mcmodel.c @@ -10,5 +10,14 @@ // RUN: %clang --target=riscv32 -### -c -mcmodel=medany %s 2>&1 | FileCheck --check-prefix=MEDIUM %s // RUN: %clang --target=riscv64 -### -c -mcmodel=medany %s 2>&1 | FileCheck --check-prefix=MEDIUM %s +// RUN: not %clang --target=riscv32 -### -c -mcmodel=large %s 2>&1 | FileCheck --check-prefix=ERR-LARGE %s +// RUN: %clang --target=riscv64 -### -c -mcmodel=large %s 2>&1 | FileCheck --check-prefix=LARGE %s + +// RUN: not %clang --target=riscv64 -### -c -mcmodel=large -fpic %s 2>&1 | FileCheck --check-prefix=LARGE %s + // SMALL: "-mcmodel=small" // MEDIUM: "-mcmodel=medium" +// LARGE: "-mcmodel=large" + +// ERR-LARGE: error: unsupported argument 'large' to option '-mcmodel=' for target 'riscv32' +// ERR-PIC-LARGE: error: invalid argument '-mcmodel=large' not allowed with '-fpic' diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp deleted file mode 100644 index 80844a58ad825a..00000000000000 --- a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp +++ /dev/null @@ -1,76 +0,0 @@ -// RUN: rm -fR %t -// RUN: split-file %s %t -// RUN: cd %t -// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized folly-conv.h -// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized thrift_cpp2_base.h -// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized -fmodule-file=folly-conv.pcm -fmodule-file=thrift_cpp2_base.pcm logger_base.h - -//--- Conv.h -#pragma once - -template -_Up __declval(int); - -template -auto declval() noexcept -> decltype(__declval<_Tp>(0)); - -namespace folly { - -template -struct Expected { - template - auto thenOrThrow() -> decltype(declval()) { - return 1; - } -}; - -struct ExpectedHelper { - template - static constexpr Expected return_(T) { - return Expected(); - } - - template - static auto then_(This&&, Fn&&) - -> decltype(T::template return_((declval()(true), 0))) { - return Expected(); - } -}; - -template -inline Expected tryTo() { - Tgt result = 0; - // In build with asserts: - // clang/lib/Sema/SemaTemplateInstantiate.cpp: llvm::PointerUnion *clang::LocalInstantiationScope::findInstantiationOf(const Decl *): Assertion `isa(D) && "declaration not instantiated in this scope"' failed. - // In release build compilation error on the line below inside lambda: - // error: variable 'result' is uninitialized when used here [-Werror,-Wuninitialized] - ExpectedHelper::then_(Expected(), [&](bool) { return result; }); - return {}; -} - -} // namespace folly - -inline void bar() { - folly::tryTo(); -} -// expected-no-diagnostics - -//--- folly-conv.h -#pragma once -#include "Conv.h" -// expected-no-diagnostics - -//--- thrift_cpp2_base.h -#pragma once -#include "Conv.h" -// expected-no-diagnostics - -//--- logger_base.h -#pragma once -import "folly-conv.h"; -import "thrift_cpp2_base.h"; - -inline void foo() { - folly::tryTo(); -} -// expected-no-diagnostics diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp deleted file mode 100644 index 5b1a904e928a68..00000000000000 --- a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// RUN: rm -fR %t -// RUN: split-file %s %t -// RUN: cd %t -// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header header.h -// RUN: %clang_cc1 -std=c++20 -fmodule-file=header.pcm main.cpp - -//--- header.h -template -void f(T) {} - -class A { - virtual ~A(); -}; - -inline A::~A() { - f([](){}); -} - -struct B { - void g() { - f([](){ - [](){}; - }); - } -}; -// expected-no-diagnostics - -//--- main.cpp -import "header.h"; -// expected-no-diagnostics diff --git a/clang/test/Misc/print-stats-vfs.test b/clang/test/Misc/print-stats-vfs.test new file mode 100644 index 00000000000000..65446cb7a5077d --- /dev/null +++ b/clang/test/Misc/print-stats-vfs.test @@ -0,0 +1,17 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -fsyntax-only %t/tu.c -I %t/dir1 -I %t/dir2 -print-stats 2>&1 | FileCheck %s + +//--- tu.c +#include "header.h" +//--- dir1/other.h +//--- dir2/header.h + +// CHECK: *** Virtual File System Stats: +// CHECK-NEXT: {{[[:digit:]]+}} status() calls +// CHECK-NEXT: {{[[:digit:]]+}} openFileForRead() calls +// CHECK-NEXT: {{[[:digit:]]+}} dir_begin() calls +// CHECK-NEXT: {{[[:digit:]]+}} getRealPath() calls +// CHECK-NEXT: {{[[:digit:]]+}} exists() calls +// CHECK-NEXT: {{[[:digit:]]+}} isLocal() calls diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c index 2673f1f519af69..628274380ae5f2 100644 --- a/clang/test/Sema/builtins-elementwise-math.c +++ b/clang/test/Sema/builtins-elementwise-math.c @@ -275,8 +275,8 @@ void test_builtin_elementwise_min(int i, short s, double d, float4 v, int3 iv, u void test_builtin_elementwise_bitreverse(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) { - struct Foo s = __builtin_elementwise_ceil(f); - // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}} + struct Foo s = __builtin_elementwise_bitreverse(i); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} i = __builtin_elementwise_bitreverse(); // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} diff --git a/clang/test/SemaCXX/PR97308.cpp b/clang/test/SemaCXX/PR97308.cpp new file mode 100644 index 00000000000000..7f550bc15d741c --- /dev/null +++ b/clang/test/SemaCXX/PR97308.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -o - -emit-llvm -triple x86_64-linux-gnu %s + +// Check there are no crash issue CodeGen action. +// https://github.com/llvm/llvm-project/pull/97308 +struct a { +} constexpr b; +class c { +public: + c(a); +}; +class B { +public: + using d = int; + struct e { + enum { f } g; + int h; + c i; + d j{}; + }; +}; +B::e k{B::e::f, int(), b}; diff --git a/clang/test/SemaCXX/constexpr-default-arg.cpp b/clang/test/SemaCXX/constexpr-default-arg.cpp index ec9b2927880bdf..901123bfb359ff 100644 --- a/clang/test/SemaCXX/constexpr-default-arg.cpp +++ b/clang/test/SemaCXX/constexpr-default-arg.cpp @@ -32,8 +32,8 @@ void test_default_arg2() { } // Check that multiple CXXDefaultInitExprs don't cause an assertion failure. -struct A { int &&r = 0; }; // expected-note 2{{default member initializer}} +struct A { int &&r = 0; }; struct B { A x, y; }; -B b = {}; // expected-warning 2{{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported}} +B b = {}; // expected-no-diagnostics } diff --git a/clang/test/SemaCXX/cxx11-default-member-initializers.cpp b/clang/test/SemaCXX/cxx11-default-member-initializers.cpp index dd8e9c6b7fc11f..5e26c3a3b82cd5 100644 --- a/clang/test/SemaCXX/cxx11-default-member-initializers.cpp +++ b/clang/test/SemaCXX/cxx11-default-member-initializers.cpp @@ -27,6 +27,103 @@ class MemInit { C m = s; }; +namespace std { +typedef decltype(sizeof(int)) size_t; + +// libc++'s implementation +template class initializer_list { + const _E *__begin_; + size_t __size_; + + initializer_list(const _E *__b, size_t __s) : __begin_(__b), __size_(__s) {} + +public: + typedef _E value_type; + typedef const _E &reference; + typedef const _E &const_reference; + typedef size_t size_type; + + typedef const _E *iterator; + typedef const _E *const_iterator; + + initializer_list() : __begin_(nullptr), __size_(0) {} + + size_t size() const { return __size_; } + const _E *begin() const { return __begin_; } + const _E *end() const { return __begin_ + __size_; } +}; +} // namespace std + +#if __cplusplus >= 201703L + +// Test CXXDefaultInitExpr rebuild issue in +// https://github.com/llvm/llvm-project/pull/87933 +namespace test_rebuild { +template class C { +public: + C(std::initializer_list); +}; + +template using Ptr = __remove_pointer(T) *; +template C(T) -> C, sizeof(T)>; + +class A { +public: + template T1 *some_func(T2 &&); +}; + +struct B : A { + int *ar = some_func(C{some_func(0)}); + B() {} +}; + +int TestBody_got; +template class Vector { +public: + Vector(std::initializer_list); +}; +template Vector(Ts...) -> Vector; +class ProgramBuilder { +public: + template int *create(ARGS); +}; + +struct TypeTest : ProgramBuilder { + int *str_f16 = create(Vector{0}); + TypeTest() {} +}; +class TypeTest_Element_Test : TypeTest { + void TestBody(); +}; +void TypeTest_Element_Test::TestBody() { + int *expect = str_f16; + &TestBody_got != expect; // expected-warning {{inequality comparison result unused}} +} +} // namespace test_rebuild + +// Test CXXDefaultInitExpr rebuild issue in +// https://github.com/llvm/llvm-project/pull/92527 +namespace test_rebuild2 { +struct F { + int g; +}; +struct H {}; +struct I { + I(const F &); + I(H); +}; +struct L { + I i = I({.g = 0}); +}; +struct N : L {}; + +void f() { + delete new L; // Ok + delete new N; // Ok +} +} // namespace test_rebuild2 +#endif // __cplusplus >= 201703L + #if __cplusplus >= 202002L // This test ensures cleanup expressions are correctly produced // in the presence of default member initializers. diff --git a/clang/test/SemaCXX/eval-crashes.cpp b/clang/test/SemaCXX/eval-crashes.cpp index 0865dafe4bf92a..21e05f19be0caa 100644 --- a/clang/test/SemaCXX/eval-crashes.cpp +++ b/clang/test/SemaCXX/eval-crashes.cpp @@ -25,11 +25,9 @@ namespace pr33140_0b { } namespace pr33140_2 { - // FIXME: The declaration of 'b' below should lifetime-extend two int - // temporaries. - struct A { int &&r = 0; }; // expected-note 2{{initializing field 'r' with default member initializer}} + struct A { int &&r = 0; }; struct B { A x, y; }; - B b = {}; // expected-warning 2{{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported}} + B b = {}; } namespace pr33140_3 { diff --git a/clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl b/clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl new file mode 100644 index 00000000000000..2450941f5d9b46 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -fsyntax-only -verify %s + +typedef vector float3; + +StructuredBuffer Buffer; + +// expected-error@+2 {{class template 'StructuredBuffer' requires template arguments}} +// expected-note@*:* {{template declaration from hidden source: template class StructuredBuffer}} +StructuredBuffer BufferErr1; + +// expected-error@+2 {{too few template arguments for class template 'StructuredBuffer'}} +// expected-note@*:* {{template declaration from hidden source: template class StructuredBuffer}} +StructuredBuffer<> BufferErr2; + +[numthreads(1,1,1)] +void main() { + (void)Buffer.h; // expected-error {{'h' is a private member of 'hlsl::StructuredBuffer >'}} + // expected-note@* {{implicitly declared private here}} +} diff --git a/clang/test/SemaHLSL/TruncationOverloadResolution.hlsl b/clang/test/SemaHLSL/TruncationOverloadResolution.hlsl index 2bcb367c5669a3..0192c27860f140 100644 --- a/clang/test/SemaHLSL/TruncationOverloadResolution.hlsl +++ b/clang/test/SemaHLSL/TruncationOverloadResolution.hlsl @@ -24,6 +24,42 @@ void Case2(float4 F) { Half2Double2(F); // expected-warning{{implicit conversion truncates vector: 'float4' (aka 'vector') to 'vector' (vector of 2 'double' values)}} } +// Case 3: Allow truncation down to vector or T. +void Half(half H); +void Float(float F); +void Double(double D); + +void Half1(half1 H); +void Float1(float1 F); +void Double1(double1 D); + +void Case3(half3 H, float3 F, double3 D) { + Half(H); // expected-warning{{implicit conversion turns vector to scalar: 'half3' (aka 'vector') to 'half'}} + Half(F); // expected-warning{{implicit conversion turns vector to scalar: 'float3' (aka 'vector') to 'half'}} + Half(D); // expected-warning{{implicit conversion turns vector to scalar: 'double3' (aka 'vector') to 'half'}} + + Float(H); // expected-warning{{implicit conversion turns vector to scalar: 'half3' (aka 'vector') to 'float'}} + Float(F); // expected-warning{{implicit conversion turns vector to scalar: 'float3' (aka 'vector') to 'float'}} + Float(D); // expected-warning{{implicit conversion turns vector to scalar: 'double3' (aka 'vector') to 'float'}} + + Double(H); // expected-warning{{implicit conversion turns vector to scalar: 'half3' (aka 'vector') to 'double'}} + Double(F); // expected-warning{{implicit conversion turns vector to scalar: 'float3' (aka 'vector') to 'double'}} + Double(D); // expected-warning{{implicit conversion turns vector to scalar: 'double3' (aka 'vector') to 'double'}} + + Half1(H); // expected-warning{{implicit conversion truncates vector: 'half3' (aka 'vector') to 'vector' (vector of 1 'half' value)}} + Half1(F); // expected-warning{{implicit conversion truncates vector: 'float3' (aka 'vector') to 'vector' (vector of 1 'half' value)}} expected-warning{{implicit conversion loses floating-point precision: 'float3' (aka 'vector') to 'vector' (vector of 1 'half' value)}} + Half1(D); // expected-warning{{implicit conversion truncates vector: 'double3' (aka 'vector') to 'vector' (vector of 1 'half' value)}} expected-warning{{implicit conversion loses floating-point precision: 'double3' (aka 'vector') to 'vector' (vector of 1 'half' value)}} + + Float1(H); // expected-warning{{implicit conversion truncates vector: 'half3' (aka 'vector') to 'vector' (vector of 1 'float' value)}} + Float1(F); // expected-warning{{implicit conversion truncates vector: 'float3' (aka 'vector') to 'vector' (vector of 1 'float' value)}} + Float1(D); // expected-warning{{implicit conversion truncates vector: 'double3' (aka 'vector') to 'vector' (vector of 1 'float' value)}} expected-warning{{implicit conversion loses floating-point precision: 'double3' (aka 'vector') to 'vector' (vector of 1 'float' value)}} + + Double1(H); // expected-warning{{implicit conversion truncates vector: 'half3' (aka 'vector') to 'vector' (vector of 1 'double' value)}} + Double1(F); // expected-warning{{implicit conversion truncates vector: 'float3' (aka 'vector') to 'vector' (vector of 1 'double' value)}} + Double1(D); // expected-warning{{implicit conversion truncates vector: 'double3' (aka 'vector') to 'vector' (vector of 1 'double' value)}} +} + + #if ERROR // Case 3: Two promotions or two conversions are ambiguous. void Float2Double2(double2 D); // expected-note{{candidate function}} diff --git a/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl b/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl index 5088991f2e28ac..b1c75acbc16c6f 100644 --- a/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl +++ b/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -x hlsl -finclude-default-header -verify %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -verify %s int2 ToTwoInts(int V) { return V.xy; // expected-error{{vector component access exceeds type 'vector' (vector of 1 'int' value)}} @@ -16,6 +16,10 @@ float2 WhatIsHappening(float V) { return V.; // expected-error{{expected unqualified-id}} } +float ScalarLValue(float2 V) { + (float)V = 4.0; // expected-error{{assignment to cast is illegal, lvalue casts are not supported}} +} + // These cases produce no error. float2 HowManyFloats(float V) { diff --git a/clang/test/SemaHLSL/Types/BuiltinVector/TruncationConstantExpr.hlsl b/clang/test/SemaHLSL/Types/BuiltinVector/TruncationConstantExpr.hlsl new file mode 100644 index 00000000000000..918daa03d80322 --- /dev/null +++ b/clang/test/SemaHLSL/Types/BuiltinVector/TruncationConstantExpr.hlsl @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -std=hlsl202x -verify %s + +// expected-no-diagnostics + +// Note: these tests are a bit awkward because at time of writing we don't have a +// good way to constexpr `any` for bool vector conditions, and the condition for +// _Static_assert must be an integral constant. +export void fn() { + // This compiling successfully verifies that the vector constant expression + // gets truncated to an integer at compile time for instantiation. + _Static_assert(((int)1.xxxx) + 0 == 1, "Woo!"); + + // This compiling successfully verifies that the vector constant expression + // gets truncated to a float at compile time for instantiation. + _Static_assert(((float)1.0.xxxx) + 0.0 == 1.0, "Woo!"); + + // This compiling successfully verifies that a vector can be truncated to a + // smaller vector, then truncated to a float as a constant expression. + _Static_assert(((float2)float4(6, 5, 4, 3)).x == 6, "Woo!"); +} diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index a8f6150dd3493d..259058c798e5d1 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -915,6 +915,13 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { if (Format == ScanningOutputFormat::Full) FD.emplace(ModuleName.empty() ? Inputs.size() : 0); + std::atomic NumStatusCalls = 0; + std::atomic NumOpenFileForReadCalls = 0; + std::atomic NumDirBeginCalls = 0; + std::atomic NumGetRealPathCalls = 0; + std::atomic NumExistsCalls = 0; + std::atomic NumIsLocalCalls = 0; + auto ScanningTask = [&](DependencyScanningService &Service) { DependencyScanningTool WorkerTool(Service); @@ -999,10 +1006,21 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { HadErrors = true; } } + + WorkerTool.getWorkerVFS().visit([&](llvm::vfs::FileSystem &VFS) { + if (auto *T = dyn_cast_or_null(&VFS)) { + NumStatusCalls += T->NumStatusCalls; + NumOpenFileForReadCalls += T->NumOpenFileForReadCalls; + NumDirBeginCalls += T->NumDirBeginCalls; + NumGetRealPathCalls += T->NumGetRealPathCalls; + NumExistsCalls += T->NumExistsCalls; + NumIsLocalCalls += T->NumIsLocalCalls; + } + }); }; DependencyScanningService Service(ScanMode, Format, OptimizeArgs, - EagerLoadModules); + EagerLoadModules, /*TraceVFS=*/Verbose); llvm::Timer T; T.startTimer(); @@ -1025,6 +1043,16 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { } T.stopTimer(); + + if (Verbose) + llvm::errs() << "\n*** Virtual File System Stats:\n" + << NumStatusCalls << " status() calls\n" + << NumOpenFileForReadCalls << " openFileForRead() calls\n" + << NumDirBeginCalls << " dir_begin() calls\n" + << NumGetRealPathCalls << " getRealPath() calls\n" + << NumExistsCalls << " exists() calls\n" + << NumIsLocalCalls << " isLocal() calls\n"; + if (PrintTiming) llvm::errs() << llvm::format( "clang-scan-deps timing: %0.2fs wall, %0.2fs process\n", diff --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp index 4b29ba720f6823..c25228a69a748f 100644 --- a/clang/unittests/Format/FormatTestJS.cpp +++ b/clang/unittests/Format/FormatTestJS.cpp @@ -2850,5 +2850,22 @@ TEST_F(FormatTestJS, DontBreakFieldsAsGoToLabels) { "};"); } +TEST_F(FormatTestJS, BreakAfterOpenBracket) { + auto Style = getGoogleStyle(FormatStyle::LK_JavaScript); + EXPECT_EQ(Style.AlignAfterOpenBracket, FormatStyle::BAS_AlwaysBreak); + verifyFormat("ctrl.onCopy(/** @type {!WizEvent}*/ (\n" + " {event, targetElement: {el: () => selectedElement}}));", + Style); + verifyFormat("failedUserIds.push(...subscriptioxxxxxxxxxxxxnSubset.map(\n" + " subscxxxxxxxxxxxxription => subscription.getUserId()));", + Style); + verifyFormat("failedUserIds.push(!subscriptioxxxxxxxxxxxxnSubset.map(\n" + " subscxxxxxxxxxxxxription => subscription.getUserId()));", + Style); + verifyFormat("failedUserIds.push(await subscriptioxxxxxxxxxxxxnSubset.map(\n" + " subscxxxxxxxxxxxxription => subscription.getUserId()));", + Style); +} + } // namespace format } // end namespace clang diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 36a6db9283893e..5c28e3a4ea5a1f 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2050,7 +2050,7 @@ TEST_F(TokenAnnotatorTest, UnderstandsFunctionDeclarationNames) { EXPECT_TOKEN(Tokens[4], tok::l_paren, TT_FunctionTypeLParen); Tokens = annotate("void instanceof();"); - ASSERT_EQ(Tokens.size(), 6u); + ASSERT_EQ(Tokens.size(), 6u) << Tokens; EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName); EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_FunctionDeclarationLParen); @@ -3365,55 +3365,55 @@ TEST_F(TokenAnnotatorTest, SwitchExpression) { TEST_F(TokenAnnotatorTest, CppAltOperatorKeywords) { auto Tokens = annotate("a = b and c;"); - ASSERT_EQ(Tokens.size(), 7u); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::ampamp, TT_BinaryOperator); Tokens = annotate("a = b and_eq c;"); - ASSERT_EQ(Tokens.size(), 7u); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::ampequal, TT_BinaryOperator); Tokens = annotate("a = b bitand c;"); - ASSERT_EQ(Tokens.size(), 7u); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::amp, TT_BinaryOperator); Tokens = annotate("a = b bitor c;"); - ASSERT_EQ(Tokens.size(), 7u); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::pipe, TT_BinaryOperator); Tokens = annotate("a = b compl c;"); - ASSERT_EQ(Tokens.size(), 7u); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::tilde, TT_UnaryOperator); Tokens = annotate("a = b not c;"); - ASSERT_EQ(Tokens.size(), 7u); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::exclaim, TT_UnaryOperator); Tokens = annotate("a = b not_eq c;"); - ASSERT_EQ(Tokens.size(), 7u); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::exclaimequal, TT_BinaryOperator); Tokens = annotate("a = b or c;"); - ASSERT_EQ(Tokens.size(), 7u); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::pipepipe, TT_BinaryOperator); Tokens = annotate("a = b or_eq c;"); - ASSERT_EQ(Tokens.size(), 7u); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::pipeequal, TT_BinaryOperator); Tokens = annotate("a = b xor c;"); - ASSERT_EQ(Tokens.size(), 7u); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::caret, TT_BinaryOperator); Tokens = annotate("a = b xor_eq c;"); - ASSERT_EQ(Tokens.size(), 7u); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::caretequal, TT_BinaryOperator); Tokens = annotate("xor = foo;"); - ASSERT_EQ(Tokens.size(), 5u); + ASSERT_EQ(Tokens.size(), 5u) << Tokens; EXPECT_TOKEN(Tokens[0], tok::identifier, TT_Unknown); Tokens = annotate("int xor = foo;"); - ASSERT_EQ(Tokens.size(), 6u); + ASSERT_EQ(Tokens.size(), 6u) << Tokens; EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName); } @@ -3423,7 +3423,7 @@ TEST_F(TokenAnnotatorTest, FunctionTryBlock) { " : foo{[] -> std::string { return {}; }(), x}, bar{y} {\n" "} catch (...) {\n" "}"); - ASSERT_EQ(Tokens.size(), 45u); + ASSERT_EQ(Tokens.size(), 45u) << Tokens; EXPECT_TOKEN(Tokens[2], tok::identifier, TT_CtorDtorDeclName); EXPECT_TOKEN(Tokens[3], tok::l_paren, TT_FunctionDeclarationLParen); EXPECT_TOKEN(Tokens[11], tok::colon, TT_CtorInitializerColon); @@ -3439,7 +3439,7 @@ TEST_F(TokenAnnotatorTest, TypenameMacro) { Style.TypenameMacros.push_back("STRUCT"); auto Tokens = annotate("STRUCT(T, B) { int i; };", Style); - ASSERT_EQ(Tokens.size(), 13u); + ASSERT_EQ(Tokens.size(), 13u) << Tokens; EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TypenameMacro); EXPECT_TOKEN(Tokens[1], tok::l_paren, TT_TypeDeclarationParen); EXPECT_TOKEN(Tokens[5], tok::r_paren, TT_TypeDeclarationParen); @@ -3451,7 +3451,7 @@ TEST_F(TokenAnnotatorTest, GNULanguageStandard) { EXPECT_EQ(Style.Standard, FormatStyle::LS_Latest); auto Tokens = annotate("return 1 <=> 2;", Style); - ASSERT_EQ(Tokens.size(), 6u); + ASSERT_EQ(Tokens.size(), 6u) << Tokens; EXPECT_TOKEN(Tokens[2], tok::spaceship, TT_BinaryOperator); } diff --git a/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp b/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp index de8dda60681ff8..70005da28559d3 100644 --- a/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp +++ b/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp @@ -88,99 +88,98 @@ struct CasedTypeInfo { }; class ASTPropsEmitter { - raw_ostream &Out; - RecordKeeper &Records; - std::map NodeInfos; + raw_ostream &Out; + RecordKeeper &Records; + std::map NodeInfos; std::vector AllPropertyTypes; std::map CasedTypeInfos; public: - ASTPropsEmitter(RecordKeeper &records, raw_ostream &out) - : Out(out), Records(records) { - - // Find all the properties. - for (Property property : - records.getAllDerivedDefinitions(PropertyClassName)) { - HasProperties node = property.getClass(); - NodeInfos[node].Properties.push_back(property); - } + ASTPropsEmitter(RecordKeeper &records, raw_ostream &out) + : Out(out), Records(records) { + + // Find all the properties. + for (Property property : + records.getAllDerivedDefinitions(PropertyClassName)) { + HasProperties node = property.getClass(); + NodeInfos[node].Properties.push_back(property); + } // Find all the creation rules. for (CreationRule creationRule : - records.getAllDerivedDefinitions(CreationRuleClassName)) { + records.getAllDerivedDefinitions(CreationRuleClassName)) { HasProperties node = creationRule.getClass(); auto &info = NodeInfos[node]; if (info.Creator) { - PrintFatalError(creationRule.getLoc(), - "multiple creator rules for \"" + node.getName() - + "\""); + PrintFatalError(creationRule.getLoc(), "multiple creator rules for \"" + + node.getName() + "\""); } info.Creator = creationRule; } // Find all the override rules. for (OverrideRule overrideRule : - records.getAllDerivedDefinitions(OverrideRuleClassName)) { + records.getAllDerivedDefinitions(OverrideRuleClassName)) { HasProperties node = overrideRule.getClass(); auto &info = NodeInfos[node]; if (info.Override) { PrintFatalError(overrideRule.getLoc(), - "multiple override rules for \"" + node.getName() - + "\""); + "multiple override rules for \"" + node.getName() + + "\""); } info.Override = overrideRule; } // Find all the write helper rules. for (ReadHelperRule helperRule : - records.getAllDerivedDefinitions(ReadHelperRuleClassName)) { + records.getAllDerivedDefinitions(ReadHelperRuleClassName)) { HasProperties node = helperRule.getClass(); auto &info = NodeInfos[node]; if (info.ReadHelper) { PrintFatalError(helperRule.getLoc(), - "multiple write helper rules for \"" + node.getName() - + "\""); + "multiple write helper rules for \"" + node.getName() + + "\""); } info.ReadHelper = helperRule; } // Find all the concrete property types. for (PropertyType type : - records.getAllDerivedDefinitions(PropertyTypeClassName)) { + records.getAllDerivedDefinitions(PropertyTypeClassName)) { // Ignore generic specializations; they're generally not useful when // emitting basic emitters etc. - if (type.isGenericSpecialization()) continue; + if (type.isGenericSpecialization()) + continue; AllPropertyTypes.push_back(type); } // Find all the type kind rules. for (TypeKindRule kindRule : - records.getAllDerivedDefinitions(TypeKindClassName)) { + records.getAllDerivedDefinitions(TypeKindClassName)) { PropertyType type = kindRule.getParentType(); auto &info = CasedTypeInfos[type]; if (info.KindRule) { - PrintFatalError(kindRule.getLoc(), - "multiple kind rules for \"" - + type.getCXXTypeName() + "\""); + PrintFatalError(kindRule.getLoc(), "multiple kind rules for \"" + + type.getCXXTypeName() + "\""); } info.KindRule = kindRule; } // Find all the type cases. for (TypeCase typeCase : - records.getAllDerivedDefinitions(TypeCaseClassName)) { + records.getAllDerivedDefinitions(TypeCaseClassName)) { CasedTypeInfos[typeCase.getParentType()].Cases.push_back(typeCase); } Validator(*this).validate(); - } + } void visitAllProperties(HasProperties derived, const NodeInfo &derivedInfo, - function_ref visit) { + function_ref visit) { std::set ignoredProperties; auto overrideRule = derivedInfo.Override; @@ -195,20 +194,19 @@ class ASTPropsEmitter { visitAllNodesWithInfo(derived, derivedInfo, [&](HasProperties node, const NodeInfo &info) { - for (Property prop : info.Properties) { - if (ignoredProperties.count(prop.getName())) - continue; + for (Property prop : info.Properties) { + if (ignoredProperties.count(prop.getName())) + continue; - visit(prop); - } - }); + visit(prop); + } + }); } - void visitAllNodesWithInfo(HasProperties derivedNode, - const NodeInfo &derivedNodeInfo, - llvm::function_ref - visit) { + void visitAllNodesWithInfo( + HasProperties derivedNode, const NodeInfo &derivedNodeInfo, + llvm::function_ref + visit) { visit(derivedNode, derivedNodeInfo); // Also walk the bases if appropriate. @@ -217,7 +215,8 @@ class ASTPropsEmitter { auto it = NodeInfos.find(base); // Ignore intermediate nodes that don't add interesting properties. - if (it == NodeInfos.end()) continue; + if (it == NodeInfos.end()) + continue; auto &baseInfo = it->second; visit(base, baseInfo); @@ -225,14 +224,12 @@ class ASTPropsEmitter { } } - template - void emitNodeReaderClass() { + template void emitNodeReaderClass() { auto info = ReaderWriterInfo::forReader(); emitNodeReaderWriterClass(info); } - template - void emitNodeWriterClass() { + template void emitNodeWriterClass() { auto info = ReaderWriterInfo::forWriter(); emitNodeReaderWriterClass(info); } @@ -241,8 +238,7 @@ class ASTPropsEmitter { void emitNodeReaderWriterClass(const ReaderWriterInfo &info); template - void emitNodeReaderWriterMethod(NodeClass node, - const ReaderWriterInfo &info); + void emitNodeReaderWriterMethod(NodeClass node, const ReaderWriterInfo &info); void emitPropertiedReaderWriterBody(HasProperties node, const ReaderWriterInfo &info); diff --git a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp index aee7d38786a51c..1a2503dcf660cf 100644 --- a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp +++ b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp @@ -20,16 +20,16 @@ using namespace llvm; -void clang::EmitClangCommentCommandInfo(RecordKeeper &Records, +void clang::EmitClangCommentCommandInfo(const RecordKeeper &Records, raw_ostream &OS) { emitSourceFileHeader("A list of commands useable in documentation comments", OS, Records); OS << "namespace {\n" "const CommandInfo Commands[] = {\n"; - std::vector Tags = Records.getAllDerivedDefinitions("Command"); + ArrayRef Tags = Records.getAllDerivedDefinitions("Command"); for (size_t i = 0, e = Tags.size(); i != e; ++i) { - Record &Tag = *Tags[i]; + const Record &Tag = *Tags[i]; OS << " { " << "\"" << Tag.getValueAsString("Name") << "\", " << "\"" << Tag.getValueAsString("EndCommandName") << "\", " << i << ", " @@ -62,7 +62,7 @@ void clang::EmitClangCommentCommandInfo(RecordKeeper &Records, std::vector Matches; for (size_t i = 0, e = Tags.size(); i != e; ++i) { - Record &Tag = *Tags[i]; + const Record &Tag = *Tags[i]; std::string Name = std::string(Tag.getValueAsString("Name")); std::string Return; raw_string_ostream(Return) << "return &Commands[" << i << "];"; @@ -112,7 +112,7 @@ static std::string MangleName(StringRef Str) { return Mangled; } -void clang::EmitClangCommentCommandList(RecordKeeper &Records, +void clang::EmitClangCommentCommandList(const RecordKeeper &Records, raw_ostream &OS) { emitSourceFileHeader("A list of commands useable in documentation comments", OS, Records); @@ -121,9 +121,9 @@ void clang::EmitClangCommentCommandList(RecordKeeper &Records, << "# define COMMENT_COMMAND(NAME)\n" << "#endif\n"; - std::vector Tags = Records.getAllDerivedDefinitions("Command"); + ArrayRef Tags = Records.getAllDerivedDefinitions("Command"); for (size_t i = 0, e = Tags.size(); i != e; ++i) { - Record &Tag = *Tags[i]; + const Record &Tag = *Tags[i]; std::string MangledName = MangleName(Tag.getValueAsString("Name")); OS << "COMMENT_COMMAND(" << MangledName << ")\n"; diff --git a/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp b/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp index f1cd9af0519d1b..bd75b3f6b652a1 100644 --- a/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp +++ b/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp @@ -46,21 +46,17 @@ static bool translateCodePointToUTF8(unsigned CodePoint, return true; } -void clang::EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records, - raw_ostream &OS) { - std::vector Tags = Records.getAllDerivedDefinitions("NCR"); +void clang::EmitClangCommentHTMLNamedCharacterReferences( + const RecordKeeper &Records, raw_ostream &OS) { std::vector NameToUTF8; SmallString<32> CLiteral; - for (std::vector::iterator I = Tags.begin(), E = Tags.end(); - I != E; ++I) { - Record &Tag = **I; - std::string Spelling = std::string(Tag.getValueAsString("Spelling")); - uint64_t CodePoint = Tag.getValueAsInt("CodePoint"); + for (const Record *Tag : Records.getAllDerivedDefinitions("NCR")) { + std::string Spelling = std::string(Tag->getValueAsString("Spelling")); + uint64_t CodePoint = Tag->getValueAsInt("CodePoint"); CLiteral.clear(); CLiteral.append("return "); if (!translateCodePointToUTF8(CodePoint, CLiteral)) { - SrcMgr.PrintMessage(Tag.getLoc().front(), - SourceMgr::DK_Error, + SrcMgr.PrintMessage(Tag->getLoc().front(), SourceMgr::DK_Error, Twine("invalid code point")); continue; } diff --git a/clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp b/clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp index 3dc1098753e0bf..a457315bc62c5c 100644 --- a/clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp +++ b/clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp @@ -19,10 +19,11 @@ using namespace llvm; -void clang::EmitClangCommentHTMLTags(RecordKeeper &Records, raw_ostream &OS) { - std::vector Tags = Records.getAllDerivedDefinitions("Tag"); +void clang::EmitClangCommentHTMLTags(const RecordKeeper &Records, + raw_ostream &OS) { + ArrayRef Tags = Records.getAllDerivedDefinitions("Tag"); std::vector Matches; - for (Record *Tag : Tags) { + for (const Record *Tag : Tags) { Matches.emplace_back(std::string(Tag->getValueAsString("Spelling")), "return true;"); } @@ -35,12 +36,12 @@ void clang::EmitClangCommentHTMLTags(RecordKeeper &Records, raw_ostream &OS) { << "}\n\n"; } -void clang::EmitClangCommentHTMLTagsProperties(RecordKeeper &Records, +void clang::EmitClangCommentHTMLTagsProperties(const RecordKeeper &Records, raw_ostream &OS) { - std::vector Tags = Records.getAllDerivedDefinitions("Tag"); + ArrayRef Tags = Records.getAllDerivedDefinitions("Tag"); std::vector MatchesEndTagOptional; std::vector MatchesEndTagForbidden; - for (Record *Tag : Tags) { + for (const Record *Tag : Tags) { std::string Spelling = std::string(Tag->getValueAsString("Spelling")); StringMatcher::StringPair Match(Spelling, "return true;"); if (Tag->getValueAsBit("EndTagOptional")) diff --git a/clang/utils/TableGen/ClangDataCollectorsEmitter.cpp b/clang/utils/TableGen/ClangDataCollectorsEmitter.cpp index 45082935c1f794..dae6710d752358 100644 --- a/clang/utils/TableGen/ClangDataCollectorsEmitter.cpp +++ b/clang/utils/TableGen/ClangDataCollectorsEmitter.cpp @@ -4,7 +4,7 @@ using namespace llvm; -void clang::EmitClangDataCollectors(RecordKeeper &RK, raw_ostream &OS) { +void clang::EmitClangDataCollectors(const RecordKeeper &RK, raw_ostream &OS) { const auto &Defs = RK.getClasses(); for (const auto &Entry : Defs) { Record &R = *Entry.second; diff --git a/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp b/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp index 6ca24a8c74b2ff..773668caa75747 100644 --- a/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp +++ b/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp @@ -39,12 +39,13 @@ using namespace llvm; namespace { class DiagGroupParentMap { - RecordKeeper &Records; - std::map > Mapping; + const RecordKeeper &Records; + std::map> Mapping; + public: - DiagGroupParentMap(RecordKeeper &records) : Records(records) { - std::vector DiagGroups - = Records.getAllDerivedDefinitions("DiagGroup"); + DiagGroupParentMap(const RecordKeeper &records) : Records(records) { + ArrayRef DiagGroups = + Records.getAllDerivedDefinitions("DiagGroup"); for (unsigned i = 0, e = DiagGroups.size(); i != e; ++i) { std::vector SubGroups = DiagGroups[i]->getValueAsListOfDefs("SubGroups"); @@ -53,7 +54,7 @@ class DiagGroupParentMap { } } - const std::vector &getParents(const Record *Group) { + const std::vector &getParents(const Record *Group) { return Mapping[Group]; } }; @@ -68,7 +69,8 @@ getCategoryFromDiagGroup(const Record *Group, // The diag group may the subgroup of one or more other diagnostic groups, // check these for a category as well. - const std::vector &Parents = DiagGroupParents.getParents(Group); + const std::vector &Parents = + DiagGroupParents.getParents(Group); for (unsigned i = 0, e = Parents.size(); i != e; ++i) { CatName = getCategoryFromDiagGroup(Parents[i], DiagGroupParents); if (!CatName.empty()) return CatName; @@ -94,19 +96,19 @@ static std::string getDiagnosticCategory(const Record *R, namespace { class DiagCategoryIDMap { - RecordKeeper &Records; + const RecordKeeper &Records; StringMap CategoryIDs; std::vector CategoryStrings; public: - DiagCategoryIDMap(RecordKeeper &records) : Records(records) { + DiagCategoryIDMap(const RecordKeeper &records) : Records(records) { DiagGroupParentMap ParentInfo(Records); // The zero'th category is "". CategoryStrings.push_back(""); CategoryIDs[""] = 0; - std::vector Diags = - Records.getAllDerivedDefinitions("Diagnostic"); + ArrayRef Diags = + Records.getAllDerivedDefinitions("Diagnostic"); for (unsigned i = 0, e = Diags.size(); i != e; ++i) { std::string Category = getDiagnosticCategory(Diags[i], ParentInfo); if (Category.empty()) continue; // Skip diags with no category. @@ -153,8 +155,8 @@ static bool diagGroupBeforeByName(const Record *LHS, const Record *RHS) { /// Invert the 1-[0/1] mapping of diags to group into a one to many /// mapping of groups to diags in the group. -static void groupDiagnostics(const std::vector &Diags, - const std::vector &DiagGroups, +static void groupDiagnostics(ArrayRef Diags, + ArrayRef DiagGroups, std::map &DiagsInGroup) { for (unsigned i = 0, e = Diags.size(); i != e; ++i) { @@ -172,7 +174,7 @@ static void groupDiagnostics(const std::vector &Diags, // Add all DiagGroup's to the DiagsInGroup list to make sure we pick up empty // groups (these are warnings that GCC supports that clang never produces). for (unsigned i = 0, e = DiagGroups.size(); i != e; ++i) { - Record *Group = DiagGroups[i]; + const Record *Group = DiagGroups[i]; GroupInfo &GI = DiagsInGroup[std::string(Group->getValueAsString("GroupName"))]; GI.GroupName = Group->getName(); @@ -255,20 +257,18 @@ class InferPedantic { GMap; DiagGroupParentMap &DiagGroupParents; - const std::vector &Diags; - const std::vector DiagGroups; + ArrayRef Diags; + const std::vector DiagGroups; std::map &DiagsInGroup; llvm::DenseSet DiagsSet; GMap GroupCount; public: InferPedantic(DiagGroupParentMap &DiagGroupParents, - const std::vector &Diags, - const std::vector &DiagGroups, + ArrayRef Diags, + ArrayRef DiagGroups, std::map &DiagsInGroup) - : DiagGroupParents(DiagGroupParents), - Diags(Diags), - DiagGroups(DiagGroups), - DiagsInGroup(DiagsInGroup) {} + : DiagGroupParents(DiagGroupParents), Diags(Diags), + DiagGroups(DiagGroups), DiagsInGroup(DiagsInGroup) {} /// Compute the set of diagnostics and groups that are immediately /// in -Wpedantic. @@ -302,7 +302,8 @@ bool InferPedantic::isSubGroupOfGroup(const Record *Group, if (GName == GroupName) return true; - const std::vector &Parents = DiagGroupParents.getParents(Group); + const std::vector &Parents = + DiagGroupParents.getParents(Group); for (unsigned i = 0, e = Parents.size(); i != e; ++i) if (isSubGroupOfGroup(Parents[i], GName)) return true; @@ -347,7 +348,8 @@ void InferPedantic::markGroup(const Record *Group) { // group's count is equal to the number of subgroups and diagnostics in // that group, we can safely add this group to -Wpedantic. if (groupInPedantic(Group, /* increment */ true)) { - const std::vector &Parents = DiagGroupParents.getParents(Group); + const std::vector &Parents = + DiagGroupParents.getParents(Group); for (unsigned i = 0, e = Parents.size(); i != e; ++i) markGroup(Parents[i]); } @@ -359,7 +361,7 @@ void InferPedantic::compute(VecOrSet DiagsInPedantic, // "pedantic" group. For those that aren't explicitly included in -Wpedantic, // mark them for consideration to be included in -Wpedantic directly. for (unsigned i = 0, e = Diags.size(); i != e; ++i) { - Record *R = Diags[i]; + const Record *R = Diags[i]; if (isExtension(R) && isOffByDefault(R)) { DiagsSet.insert(R); if (DefInit *Group = dyn_cast(R->getValueInit("Group"))) { @@ -375,7 +377,7 @@ void InferPedantic::compute(VecOrSet DiagsInPedantic, // march through Diags a second time to ensure the results are emitted // in deterministic order. for (unsigned i = 0, e = Diags.size(); i != e; ++i) { - Record *R = Diags[i]; + const Record *R = Diags[i]; if (!DiagsSet.count(R)) continue; // Check if the group is implicitly in -Wpedantic. If so, @@ -401,13 +403,14 @@ void InferPedantic::compute(VecOrSet DiagsInPedantic, // march through the groups to ensure the results are emitted /// in a deterministc order. for (unsigned i = 0, ei = DiagGroups.size(); i != ei; ++i) { - Record *Group = DiagGroups[i]; + const Record *Group = DiagGroups[i]; if (!groupInPedantic(Group)) continue; - const std::vector &Parents = DiagGroupParents.getParents(Group); - bool AllParentsInPedantic = - llvm::all_of(Parents, [&](Record *R) { return groupInPedantic(R); }); + const std::vector &Parents = + DiagGroupParents.getParents(Group); + bool AllParentsInPedantic = llvm::all_of( + Parents, [&](const Record *R) { return groupInPedantic(R); }); // If all the parents are in -Wpedantic, this means that this diagnostic // group will be indirectly included by -Wpedantic already. In that // case, do not add it directly to -Wpedantic. If the group has no @@ -583,7 +586,7 @@ struct DiagnosticTextBuilder { DiagnosticTextBuilder(DiagnosticTextBuilder const &) = delete; DiagnosticTextBuilder &operator=(DiagnosticTextBuilder const &) = delete; - DiagnosticTextBuilder(RecordKeeper &Records) { + DiagnosticTextBuilder(const RecordKeeper &Records) { // Build up the list of substitution records. for (auto *S : Records.getAllDerivedDefinitions("TextSubstitution")) { EvaluatingRecordGuard Guard(&EvaluatingRecord, S); @@ -593,7 +596,7 @@ struct DiagnosticTextBuilder { // Check that no diagnostic definitions have the same name as a // substitution. - for (Record *Diag : Records.getAllDerivedDefinitions("Diagnostic")) { + for (const Record *Diag : Records.getAllDerivedDefinitions("Diagnostic")) { StringRef Name = Diag->getName(); if (Substitutions.count(Name)) llvm::PrintFatalError( @@ -1407,7 +1410,7 @@ static void verifyDiagnosticWording(const Record &Diag) { /// ClangDiagsDefsEmitter - The top-level class emits .def files containing /// declarations of Clang diagnostics. -void clang::EmitClangDiagsDefs(RecordKeeper &Records, raw_ostream &OS, +void clang::EmitClangDiagsDefs(const RecordKeeper &Records, raw_ostream &OS, const std::string &Component) { // Write the #if guard if (!Component.empty()) { @@ -1421,10 +1424,11 @@ void clang::EmitClangDiagsDefs(RecordKeeper &Records, raw_ostream &OS, DiagnosticTextBuilder DiagTextBuilder(Records); - std::vector Diags = Records.getAllDerivedDefinitions("Diagnostic"); + ArrayRef Diags = + Records.getAllDerivedDefinitions("Diagnostic"); - std::vector DiagGroups - = Records.getAllDerivedDefinitions("DiagGroup"); + ArrayRef DiagGroups = + Records.getAllDerivedDefinitions("DiagGroup"); std::map DiagsInGroup; groupDiagnostics(Diags, DiagGroups, DiagsInGroup); @@ -1764,7 +1768,7 @@ static void emitDiagTable(std::map &DiagsInGroup, /// CATEGORY("Lambda Issue", DiagCat_Lambda_Issue) /// #endif /// \endcode -static void emitCategoryTable(RecordKeeper &Records, raw_ostream &OS) { +static void emitCategoryTable(const RecordKeeper &Records, raw_ostream &OS) { DiagCategoryIDMap CategoriesByID(Records); OS << "\n#ifdef GET_CATEGORY_TABLE\n"; for (auto const &C : CategoriesByID) @@ -1772,13 +1776,14 @@ static void emitCategoryTable(RecordKeeper &Records, raw_ostream &OS) { OS << "#endif // GET_CATEGORY_TABLE\n\n"; } -void clang::EmitClangDiagGroups(RecordKeeper &Records, raw_ostream &OS) { +void clang::EmitClangDiagGroups(const RecordKeeper &Records, raw_ostream &OS) { // Compute a mapping from a DiagGroup to all of its parents. DiagGroupParentMap DGParentMap(Records); - std::vector Diags = Records.getAllDerivedDefinitions("Diagnostic"); + ArrayRef Diags = + Records.getAllDerivedDefinitions("Diagnostic"); - std::vector DiagGroups = + ArrayRef DiagGroups = Records.getAllDerivedDefinitions("DiagGroup"); std::map DiagsInGroup; @@ -1824,9 +1829,10 @@ struct RecordIndexElement }; } // end anonymous namespace. -void clang::EmitClangDiagsIndexName(RecordKeeper &Records, raw_ostream &OS) { - const std::vector &Diags = - Records.getAllDerivedDefinitions("Diagnostic"); +void clang::EmitClangDiagsIndexName(const RecordKeeper &Records, + raw_ostream &OS) { + ArrayRef Diags = + Records.getAllDerivedDefinitions("Diagnostic"); std::vector Index; Index.reserve(Diags.size()); @@ -1915,7 +1921,7 @@ void writeDiagnosticText(DiagnosticTextBuilder &Builder, const Record *R, } // namespace } // namespace docs -void clang::EmitClangDiagDocs(RecordKeeper &Records, raw_ostream &OS) { +void clang::EmitClangDiagDocs(const RecordKeeper &Records, raw_ostream &OS) { using namespace docs; // Get the documentation introduction paragraph. @@ -1930,10 +1936,10 @@ void clang::EmitClangDiagDocs(RecordKeeper &Records, raw_ostream &OS) { DiagnosticTextBuilder Builder(Records); - std::vector Diags = + ArrayRef Diags = Records.getAllDerivedDefinitions("Diagnostic"); - std::vector DiagGroups = + std::vector DiagGroups = Records.getAllDerivedDefinitions("DiagGroup"); llvm::sort(DiagGroups, diagGroupBeforeByName); diff --git a/clang/utils/TableGen/ClangOpcodesEmitter.cpp b/clang/utils/TableGen/ClangOpcodesEmitter.cpp index 120e1e2efa32b4..7e426d59359a87 100644 --- a/clang/utils/TableGen/ClangOpcodesEmitter.cpp +++ b/clang/utils/TableGen/ClangOpcodesEmitter.cpp @@ -20,11 +20,11 @@ using namespace llvm; namespace { class ClangOpcodesEmitter { - RecordKeeper &Records; + const RecordKeeper &Records; unsigned NumTypes; public: - ClangOpcodesEmitter(RecordKeeper &R) + ClangOpcodesEmitter(const RecordKeeper &R) : Records(R), NumTypes(Records.getAllDerivedDefinitions("Type").size()) {} void run(raw_ostream &OS); @@ -404,6 +404,6 @@ void ClangOpcodesEmitter::PrintTypes(raw_ostream &OS, OS << ">"; } -void clang::EmitClangOpcodes(RecordKeeper &Records, raw_ostream &OS) { +void clang::EmitClangOpcodes(const RecordKeeper &Records, raw_ostream &OS) { ClangOpcodesEmitter(Records).run(OS); } diff --git a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp index 74c3a856ab6937..d68dcc472a7bdb 100644 --- a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp +++ b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp @@ -87,7 +87,7 @@ struct BuiltinTableEntries { // class BuiltinNameEmitter { public: - BuiltinNameEmitter(RecordKeeper &Records, raw_ostream &OS) + BuiltinNameEmitter(const RecordKeeper &Records, raw_ostream &OS) : Records(Records), OS(OS) {} // Entrypoint to generate the functions and structures for checking @@ -100,7 +100,7 @@ class BuiltinNameEmitter { // Contains OpenCL builtin functions and related information, stored as // Record instances. They are coming from the associated TableGen file. - RecordKeeper &Records; + const RecordKeeper &Records; // The output file. raw_ostream &OS; @@ -113,7 +113,7 @@ class BuiltinNameEmitter { // \param Output (out) String containing the enums to emit in the output file. // \param List (out) List containing the extracted Types, except the Types in // TypesSeen. - void ExtractEnumTypes(std::vector &Types, + void ExtractEnumTypes(ArrayRef Types, StringMap &TypesSeen, std::string &Output, std::vector &List); @@ -237,7 +237,7 @@ class BuiltinNameEmitter { /// Base class for emitting a file (e.g. header or test) from OpenCLBuiltins.td class OpenCLBuiltinFileEmitterBase { public: - OpenCLBuiltinFileEmitterBase(RecordKeeper &Records, raw_ostream &OS) + OpenCLBuiltinFileEmitterBase(const RecordKeeper &Records, raw_ostream &OS) : Records(Records), OS(OS) {} virtual ~OpenCLBuiltinFileEmitterBase() = default; @@ -305,7 +305,7 @@ class OpenCLBuiltinFileEmitterBase { // Contains OpenCL builtin functions and related information, stored as // Record instances. They are coming from the associated TableGen file. - RecordKeeper &Records; + const RecordKeeper &Records; // The output file. raw_ostream &OS; @@ -316,7 +316,7 @@ class OpenCLBuiltinFileEmitterBase { // builtin function described in the .td input. class OpenCLBuiltinTestEmitter : public OpenCLBuiltinFileEmitterBase { public: - OpenCLBuiltinTestEmitter(RecordKeeper &Records, raw_ostream &OS) + OpenCLBuiltinTestEmitter(const RecordKeeper &Records, raw_ostream &OS) : OpenCLBuiltinFileEmitterBase(Records, OS) {} // Entrypoint to generate the functions for testing all OpenCL builtin @@ -329,7 +329,7 @@ class OpenCLBuiltinTestEmitter : public OpenCLBuiltinFileEmitterBase { // prototype for each builtin function described in the .td input. class OpenCLBuiltinHeaderEmitter : public OpenCLBuiltinFileEmitterBase { public: - OpenCLBuiltinHeaderEmitter(RecordKeeper &Records, raw_ostream &OS) + OpenCLBuiltinHeaderEmitter(const RecordKeeper &Records, raw_ostream &OS) : OpenCLBuiltinFileEmitterBase(Records, OS) {} // Entrypoint to generate the header. @@ -362,7 +362,7 @@ void BuiltinNameEmitter::Emit() { EmitQualTypeFinder(); } -void BuiltinNameEmitter::ExtractEnumTypes(std::vector &Types, +void BuiltinNameEmitter::ExtractEnumTypes(ArrayRef Types, StringMap &TypesSeen, std::string &Output, std::vector &List) { @@ -392,11 +392,11 @@ void BuiltinNameEmitter::EmitDeclarations() { // Extract generic types and non-generic types separately, to keep // gentypes at the end of the enum which simplifies the special handling // for gentypes in SemaLookup. - std::vector GenTypes = + ArrayRef GenTypes = Records.getAllDerivedDefinitions("GenericType"); ExtractEnumTypes(GenTypes, TypesSeen, GenTypeEnums, GenTypeList); - std::vector Types = Records.getAllDerivedDefinitions("Type"); + ArrayRef Types = Records.getAllDerivedDefinitions("Type"); ExtractEnumTypes(Types, TypesSeen, TypeEnums, TypeList); OS << TypeEnums; @@ -499,7 +499,7 @@ static void VerifySignature(const std::vector &Signature, void BuiltinNameEmitter::GetOverloads() { // Populate the TypeMap. - std::vector Types = Records.getAllDerivedDefinitions("Type"); + ArrayRef Types = Records.getAllDerivedDefinitions("Type"); unsigned I = 0; for (const auto &T : Types) { TypeMap.insert(std::make_pair(T, I++)); @@ -507,7 +507,8 @@ void BuiltinNameEmitter::GetOverloads() { // Populate the SignaturesList and the FctOverloadMap. unsigned CumulativeSignIndex = 0; - std::vector Builtins = Records.getAllDerivedDefinitions("Builtin"); + ArrayRef Builtins = + Records.getAllDerivedDefinitions("Builtin"); for (const auto *B : Builtins) { StringRef BName = B->getValueAsString("Name"); FctOverloadMap.try_emplace(BName); @@ -535,7 +536,7 @@ void BuiltinNameEmitter::GetOverloads() { void BuiltinNameEmitter::EmitExtensionTable() { OS << "static const char *FunctionExtensionTable[] = {\n"; unsigned Index = 0; - std::vector FuncExtensions = + ArrayRef FuncExtensions = Records.getAllDerivedDefinitions("FunctionExtension"); for (const auto &FE : FuncExtensions) { @@ -804,11 +805,11 @@ static void OCL2Qual(Sema &S, const OpenCLTypeStruct &Ty, OS << "\n switch (Ty.ID) {\n"; // Switch cases for image types (Image2d, Image3d, ...) - std::vector ImageTypes = + ArrayRef ImageTypes = Records.getAllDerivedDefinitions("ImageType"); // Map an image type name to its 3 access-qualified types (RO, WO, RW). - StringMap> ImageTypesMap; + StringMap> ImageTypesMap; for (auto *IT : ImageTypes) ImageTypesMap[IT->getValueAsString("Name")].push_back(IT); @@ -890,7 +891,7 @@ static void OCL2Qual(Sema &S, const OpenCLTypeStruct &Ty, // Switch cases for non generic, non image types (int, int4, float, ...). // Only insert the plain scalar type; vector information and type qualifiers // are added in step 2. - std::vector Types = Records.getAllDerivedDefinitions("Type"); + ArrayRef Types = Records.getAllDerivedDefinitions("Type"); StringMap TypesSeen; for (const auto *T : Types) { @@ -1211,7 +1212,8 @@ void OpenCLBuiltinTestEmitter::emit() { unsigned TestID = 0; // Iterate over all builtins. - std::vector Builtins = Records.getAllDerivedDefinitions("Builtin"); + ArrayRef Builtins = + Records.getAllDerivedDefinitions("Builtin"); for (const auto *B : Builtins) { StringRef Name = B->getValueAsString("Name"); @@ -1274,7 +1276,8 @@ void OpenCLBuiltinHeaderEmitter::emit() { )"; // Iterate over all builtins; sort to follow order of definition in .td file. - std::vector Builtins = Records.getAllDerivedDefinitions("Builtin"); + std::vector Builtins = + Records.getAllDerivedDefinitions("Builtin"); llvm::sort(Builtins, LessRecord()); for (const auto *B : Builtins) { @@ -1319,18 +1322,19 @@ void OpenCLBuiltinHeaderEmitter::emit() { "#pragma OPENCL EXTENSION all : disable\n"; } -void clang::EmitClangOpenCLBuiltins(RecordKeeper &Records, raw_ostream &OS) { +void clang::EmitClangOpenCLBuiltins(const RecordKeeper &Records, + raw_ostream &OS) { BuiltinNameEmitter NameChecker(Records, OS); NameChecker.Emit(); } -void clang::EmitClangOpenCLBuiltinHeader(RecordKeeper &Records, +void clang::EmitClangOpenCLBuiltinHeader(const RecordKeeper &Records, raw_ostream &OS) { OpenCLBuiltinHeaderEmitter HeaderFileGenerator(Records, OS); HeaderFileGenerator.emit(); } -void clang::EmitClangOpenCLBuiltinTests(RecordKeeper &Records, +void clang::EmitClangOpenCLBuiltinTests(const RecordKeeper &Records, raw_ostream &OS) { OpenCLBuiltinTestEmitter TestFileGenerator(Records, OS); TestFileGenerator.emit(); diff --git a/clang/utils/TableGen/ClangOptionDocEmitter.cpp b/clang/utils/TableGen/ClangOptionDocEmitter.cpp index 86835611b84218..8c32f0218e761b 100644 --- a/clang/utils/TableGen/ClangOptionDocEmitter.cpp +++ b/clang/utils/TableGen/ClangOptionDocEmitter.cpp @@ -24,8 +24,8 @@ using namespace llvm; namespace { struct DocumentedOption { - Record *Option; - std::vector Aliases; + const Record *Option; + std::vector Aliases; }; struct DocumentedGroup; struct Documentation { @@ -37,7 +37,7 @@ struct Documentation { } }; struct DocumentedGroup : Documentation { - Record *Group; + const Record *Group; }; static bool hasFlag(const Record *Option, StringRef OptionFlag, @@ -63,25 +63,25 @@ static bool isOptionVisible(const Record *Option, const Record *DocInfo) { } // Reorganize the records into a suitable form for emitting documentation. -Documentation extractDocumentation(RecordKeeper &Records, +Documentation extractDocumentation(const RecordKeeper &Records, const Record *DocInfo) { Documentation Result; // Build the tree of groups. The root in the tree is the fake option group // (Record*)nullptr, which contains all top-level groups and options. - std::map > OptionsInGroup; - std::map > GroupsInGroup; - std::map > Aliases; + std::map> OptionsInGroup; + std::map> GroupsInGroup; + std::map> Aliases; - std::map OptionsByName; - for (Record *R : Records.getAllDerivedDefinitions("Option")) + std::map OptionsByName; + for (const Record *R : Records.getAllDerivedDefinitions("Option")) OptionsByName[std::string(R->getValueAsString("Name"))] = R; - auto Flatten = [](Record *R) { + auto Flatten = [](const Record *R) { return R->getValue("DocFlatten") && R->getValueAsBit("DocFlatten"); }; - auto SkipFlattened = [&](Record *R) -> Record* { + auto SkipFlattened = [&](const Record *R) -> const Record * { while (R && Flatten(R)) { auto *G = dyn_cast(R->getValueInit("Group")); if (!G) @@ -91,17 +91,17 @@ Documentation extractDocumentation(RecordKeeper &Records, return R; }; - for (Record *R : Records.getAllDerivedDefinitions("OptionGroup")) { + for (const Record *R : Records.getAllDerivedDefinitions("OptionGroup")) { if (Flatten(R)) continue; - Record *Group = nullptr; + const Record *Group = nullptr; if (auto *G = dyn_cast(R->getValueInit("Group"))) Group = SkipFlattened(G->getDef()); GroupsInGroup[Group].push_back(R); } - for (Record *R : Records.getAllDerivedDefinitions("Option")) { + for (const Record *R : Records.getAllDerivedDefinitions("Option")) { if (auto *A = dyn_cast(R->getValueInit("Alias"))) { Aliases[A->getDef()].push_back(R); continue; @@ -120,33 +120,33 @@ Documentation extractDocumentation(RecordKeeper &Records, } } - Record *Group = nullptr; + const Record *Group = nullptr; if (auto *G = dyn_cast(R->getValueInit("Group"))) Group = SkipFlattened(G->getDef()); OptionsInGroup[Group].push_back(R); } - auto CompareByName = [](Record *A, Record *B) { + auto CompareByName = [](const Record *A, const Record *B) { return A->getValueAsString("Name") < B->getValueAsString("Name"); }; - auto CompareByLocation = [](Record *A, Record *B) { + auto CompareByLocation = [](const Record *A, const Record *B) { return A->getLoc()[0].getPointer() < B->getLoc()[0].getPointer(); }; - auto DocumentationForOption = [&](Record *R) -> DocumentedOption { + auto DocumentationForOption = [&](const Record *R) -> DocumentedOption { auto &A = Aliases[R]; llvm::sort(A, CompareByName); return {R, std::move(A)}; }; - std::function DocumentationForGroup = - [&](Record *R) -> Documentation { + std::function DocumentationForGroup = + [&](const Record *R) -> Documentation { Documentation D; auto &Groups = GroupsInGroup[R]; llvm::sort(Groups, CompareByLocation); - for (Record *G : Groups) { + for (const Record *G : Groups) { D.Groups.emplace_back(); D.Groups.back().Group = G; Documentation &Base = D.Groups.back(); @@ -157,7 +157,7 @@ Documentation extractDocumentation(RecordKeeper &Records, auto &Options = OptionsInGroup[R]; llvm::sort(Options, CompareByName); - for (Record *O : Options) + for (const Record *O : Options) if (isOptionVisible(O, DocInfo)) D.Options.push_back(DocumentationForOption(O)); @@ -444,7 +444,7 @@ void emitDocumentation(int Depth, const Documentation &Doc, } // namespace -void clang::EmitClangOptDocs(RecordKeeper &Records, raw_ostream &OS) { +void clang::EmitClangOptDocs(const RecordKeeper &Records, raw_ostream &OS) { const Record *DocInfo = Records.getDef("GlobalDocumentation"); if (!DocInfo) { PrintFatalError("The GlobalDocumentation top-level definition is missing, " diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h index 35cc04d6ef31f4..6b8d7f82ec9845 100644 --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -76,26 +76,27 @@ void EmitClangAttrDocTable(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangBuiltins(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangDiagsDefs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS, - const std::string &Component); -void EmitClangDiagGroups(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangDiagsIndexName(llvm::RecordKeeper &Records, +void EmitClangDiagsDefs(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS, const std::string &Component); +void EmitClangDiagGroups(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitClangDiagsIndexName(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangSACheckers(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangCommentHTMLTags(llvm::RecordKeeper &Records, +void EmitClangCommentHTMLTags(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangCommentHTMLTagsProperties(llvm::RecordKeeper &Records, +void EmitClangCommentHTMLTagsProperties(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangCommentHTMLNamedCharacterReferences(llvm::RecordKeeper &Records, - llvm::raw_ostream &OS); +void EmitClangCommentHTMLNamedCharacterReferences( + const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangCommentCommandInfo(llvm::RecordKeeper &Records, +void EmitClangCommentCommandInfo(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangCommentCommandList(llvm::RecordKeeper &Records, +void EmitClangCommentCommandList(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangOpcodes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitClangOpcodes(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangSyntaxNodeList(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); @@ -142,17 +143,18 @@ void EmitCdeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitCdeBuiltinAliases(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangDiagDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangOptDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitClangDiagDocs(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitClangOptDocs(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangOpenCLBuiltins(llvm::RecordKeeper &Records, +void EmitClangOpenCLBuiltins(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangOpenCLBuiltinHeader(llvm::RecordKeeper &Records, +void EmitClangOpenCLBuiltinHeader(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangOpenCLBuiltinTests(llvm::RecordKeeper &Records, +void EmitClangOpenCLBuiltinTests(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangDataCollectors(llvm::RecordKeeper &Records, +void EmitClangDataCollectors(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitTestPragmaAttributeSupportedAttributes(llvm::RecordKeeper &Records, diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index b638f0ff30bcce..f036fc5add2413 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -10717,7 +10717,7 @@

C++ defect report implementation status

1815 CD4 Lifetime extension in aggregate initialization - No + Clang 20 1816 diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c index 0dd397783b67f5..ea2da23a95278f 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64.c +++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c @@ -14,7 +14,7 @@ #include "aarch64.h" -#if !defined(__aarch64__) +#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64) #error This file is intended only for aarch64-based targets #endif diff --git a/compiler-rt/lib/lsan/lsan_interceptors.cpp b/compiler-rt/lib/lsan/lsan_interceptors.cpp index b569c337e97641..efbf2fdfb0ab3f 100644 --- a/compiler-rt/lib/lsan/lsan_interceptors.cpp +++ b/compiler-rt/lib/lsan/lsan_interceptors.cpp @@ -77,6 +77,8 @@ INTERCEPTOR(void*, malloc, uptr size) { } INTERCEPTOR(void, free, void *p) { + if (UNLIKELY(!p)) + return; if (DlsymAlloc::PointerIsMine(p)) return DlsymAlloc::Free(p); ENSURE_LSAN_INITED; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 7d7ed9bc07ccfe..e71a6bcd6a8371 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -183,6 +183,11 @@ #define SANITIZER_INTERCEPT_FPUTS SI_POSIX #define SANITIZER_INTERCEPT_PUTS SI_POSIX +#define SANITIZER_INTERCEPT_CREAT64 (SI_GLIBC || SI_SOLARIS32) +#define SANITIZER_INTERCEPT_FCNTL64 (SI_GLIBC || SI_SOLARIS32) +#define SANITIZER_INTERCEPT_OPEN64 (SI_GLIBC || SI_SOLARIS32) +#define SANITIZER_INTERCEPT_OPENAT64 (SI_GLIBC || SI_SOLARIS32) + #define SANITIZER_INTERCEPT_PREAD64 (SI_GLIBC || SI_SOLARIS32) #define SANITIZER_INTERCEPT_PWRITE64 (SI_GLIBC || SI_SOLARIS32) diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 045070d0e34de9..a3b6e309ed3fce 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -1207,7 +1207,7 @@ template class SizeClassAllocator64 { void getMemoryGroupFragmentationInfoInRegion(RegionInfo *Region, uptr ClassId, ScopedString *Str) - REQUIRES(Region->MMLock) { + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { const uptr BlockSize = getSizeByClassId(ClassId); const uptr AllocatedUserEnd = Region->MemMapInfo.AllocatedUser + Region->RegionBeg; diff --git a/compiler-rt/test/sanitizer_common/TestCases/dlsym_alloc.c b/compiler-rt/test/sanitizer_common/TestCases/dlsym_alloc.c new file mode 100644 index 00000000000000..0228c3bc50dbd9 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/dlsym_alloc.c @@ -0,0 +1,58 @@ +// RUN: %clang -O0 %s -o %t && %run %t + +// FIXME: TSAN does not use DlsymAlloc. +// UNSUPPORTED: tsan + +#include + +const char *test() __attribute__((disable_sanitizer_instrumentation)) { + void *volatile p = malloc(3); + p = realloc(p, 7); + free(p); + + p = calloc(3, 7); + free(p); + + free(NULL); + + return ""; +} + +const char *__asan_default_options() + __attribute__((disable_sanitizer_instrumentation)) { + return test(); +} +const char *__hwasan_default_options() + __attribute__((disable_sanitizer_instrumentation)) { + return test(); +} +const char *__lsan_default_options() + __attribute__((disable_sanitizer_instrumentation)) { + return test(); +} +const char *__memprof_default_options() + __attribute__((disable_sanitizer_instrumentation)) { + return test(); +} +const char *__msan_default_options() + __attribute__((disable_sanitizer_instrumentation)) { + return test(); +} +const char *__nsan_default_options() + __attribute__((disable_sanitizer_instrumentation)) { + return test(); +} +const char *__rtsan_default_options() + __attribute__((disable_sanitizer_instrumentation)) { + return test(); +} +const char *__tsan_default_options() + __attribute__((disable_sanitizer_instrumentation)) { + return test(); +} +const char *__ubsan_default_options() + __attribute__((disable_sanitizer_instrumentation)) { + return test(); +} + +int main(int argc, char **argv) { return 0; } diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py index 3944c1c4b009db..2307550aca047b 100644 --- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py +++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py @@ -206,6 +206,33 @@ def launch(self, cmdline): def step(self): self._thread.StepInto() + stop_reason = self._thread.GetStopReason() + # If we (1) completed a step and (2) are sitting at a breakpoint, + # but (3) the breakpoint is not reported as the stop reason, then + # we'll need to step once more to hit the breakpoint. + # + # dexter sets breakpoints on every source line, then steps + # each source line. Older lldb's would overwrite the stop + # reason with "breakpoint hit" when we stopped at a breakpoint, + # even if the breakpoint hadn't been exectued yet. One + # step per source line, hitting a breakpoint each time. + # + # But a more accurate behavior is that the step completes + # with step-completed stop reason, then when we step again, + # we execute the breakpoint and stop (with the pc the same) and + # a breakpoint-hit stop reason. So we need to step twice per line. + if stop_reason == self._interface.eStopReasonPlanComplete: + stepped_to_breakpoint = False + pc = self._thread.GetFrameAtIndex(0).GetPC() + for bp in self._target.breakpoints: + for bploc in bp.locations: + if ( + bploc.IsEnabled() + and bploc.GetAddress().GetLoadAddress(self._target) == pc + ): + stepped_to_breakpoint = True + if stepped_to_breakpoint: + self._thread.StepInto() def go(self) -> ReturnCode: self._process.Continue() diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 10aef72dd42cb9..e94a49f6871db4 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -2744,7 +2744,6 @@ std::pair ExpressionAnalyzer::ResolveGeneric( (!procedure->IsElemental() && nonElemental)) { int d{ComputeCudaMatchingDistance( context_.languageFeatures(), *procedure, localActuals)}; - llvm::errs() << "matching distance: " << d << "\n"; if (d != crtMatchingDistance) { if (d > crtMatchingDistance) { continue; diff --git a/libc/cmake/modules/LLVMLibCArchitectures.cmake b/libc/cmake/modules/LLVMLibCArchitectures.cmake index d922b4f21a8ac6..7711127c1a81e1 100644 --- a/libc/cmake/modules/LLVMLibCArchitectures.cmake +++ b/libc/cmake/modules/LLVMLibCArchitectures.cmake @@ -206,6 +206,13 @@ if(explicit_target_triple AND endif() endif() + +# Windows does not support full mode build. +if (LIBC_TARGET_OS_IS_WINDOWS AND LLVM_LIBC_FULL_BUILD) + message(FATAL_ERROR "Windows does not support full mode build.") +endif () + + message(STATUS "Building libc for ${LIBC_TARGET_ARCHITECTURE} on ${LIBC_TARGET_OS} with LIBC_COMPILE_OPTIONS_DEFAULT: ${LIBC_COMPILE_OPTIONS_DEFAULT}") diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake index 45dfe3e63302bf..8643c9bb48ad41 100644 --- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake +++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake @@ -75,6 +75,10 @@ function(_get_compile_options_from_config output_var) list(APPEND config_options "-DLIBC_TYPES_TIME_T_IS_32_BIT") endif() + if(LIBC_ADD_NULL_CHECKS) + list(APPEND config_options "-DLIBC_ADD_NULL_CHECKS") + endif() + set(${output_var} ${config_options} PARENT_SCOPE) endfunction(_get_compile_options_from_config) diff --git a/libc/config/config.json b/libc/config/config.json index 2e72c0a3fd1d69..7dfbb560a36db3 100644 --- a/libc/config/config.json +++ b/libc/config/config.json @@ -94,5 +94,11 @@ "value": false, "doc": "Force the size of time_t to 64 bits, even on platforms where compatibility considerations would otherwise make it 32-bit." } + }, + "general": { + "LIBC_ADD_NULL_CHECKS": { + "value": true, + "doc": "Add nullptr checks in the library's implementations to some functions for which passing nullptr is undefined behavior." + } } } diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst index 54ca5d55d7b243..86875d4c975c01 100644 --- a/libc/docs/configure.rst +++ b/libc/docs/configure.rst @@ -30,6 +30,8 @@ to learn about the defaults for your platform and target. - ``LIBC_CONF_KEEP_FRAME_POINTER``: Keep frame pointer in functions for better debugging experience. * **"errno" options** - ``LIBC_CONF_ERRNO_MODE``: The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_DEFAULT, LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, and LIBC_ERRNO_MODE_SYSTEM. +* **"general" options** + - ``LIBC_ADD_NULL_CHECKS``: Add nullptr checks in the library's implementations to some functions for which passing nullptr is undefined behavior. * **"math" options** - ``LIBC_CONF_MATH_OPTIMIZATIONS``: Configures optimizations for math functions. Values accepted are LIBC_MATH_SKIP_ACCURATE_PASS, LIBC_MATH_SMALL_TABLES, LIBC_MATH_NO_ERRNO, LIBC_MATH_NO_EXCEPT, and LIBC_MATH_FAST. * **"printf" options** diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt index a2fad9b473ed7e..e0b65b7c2eb02d 100644 --- a/libc/hdr/CMakeLists.txt +++ b/libc/hdr/CMakeLists.txt @@ -143,4 +143,22 @@ add_proxy_header_library( libc.include.llvm-libc-macros.limits_macros ) +add_proxy_header_library( + link_macros + HDRS + link_macros.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-macros.link_macros + libc.include.link +) + +add_proxy_header_library( + sys_auxv_macros + HDRS + sys_auxv_macros.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-macros.sys_auxv_macros + libc.include.sys_auxv +) + add_subdirectory(types) diff --git a/libc/hdr/link_macros.h b/libc/hdr/link_macros.h new file mode 100644 index 00000000000000..8a78a864e6ce4c --- /dev/null +++ b/libc/hdr/link_macros.h @@ -0,0 +1,22 @@ +//===-- Definition of macros from link.h ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_LINK_MACROS_H +#define LLVM_LIBC_HDR_LINK_MACROS_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-macros/link-macros.h" + +#else // Overlay mode + +#include + +#endif // LLVM_LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_LINK_MACROS_H diff --git a/libc/hdr/sys_auxv_macros.h b/libc/hdr/sys_auxv_macros.h new file mode 100644 index 00000000000000..c04011baedb860 --- /dev/null +++ b/libc/hdr/sys_auxv_macros.h @@ -0,0 +1,22 @@ +//===-- Definition of macros from sys/auxv.h ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_SYS_AUXV_MACROS_H +#define LLVM_LIBC_HDR_SYS_AUXV_MACROS_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-macros/sys-auxv-macros.h" + +#else // Overlay mode + +#include + +#endif // LLVM_LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_SYS_AUXV_MACROS_H diff --git a/libc/include/llvm-libc-macros/float16-macros.h b/libc/include/llvm-libc-macros/float16-macros.h index 9a11ecc49307e2..229e3e62f2aedf 100644 --- a/libc/include/llvm-libc-macros/float16-macros.h +++ b/libc/include/llvm-libc-macros/float16-macros.h @@ -13,7 +13,8 @@ #if defined(__FLT16_MANT_DIG__) && \ (!defined(__GNUC__) || __GNUC__ >= 13 || defined(__clang__)) && \ - !defined(__arm__) && !defined(_M_ARM) && !defined(__riscv) + !defined(__arm__) && !defined(_M_ARM) && !defined(__riscv) && \ + !defined(_WIN32) #define LIBC_TYPES_HAS_FLOAT16 // TODO: This would no longer be required if HdrGen let us guard function diff --git a/libc/include/llvm-libc-macros/stdckdint-macros.h b/libc/include/llvm-libc-macros/stdckdint-macros.h index 694412290bbca0..17e4ccdc2d5f8e 100644 --- a/libc/include/llvm-libc-macros/stdckdint-macros.h +++ b/libc/include/llvm-libc-macros/stdckdint-macros.h @@ -10,8 +10,10 @@ #define LLVM_LIBC_MACROS_STDCKDINT_MACROS_H // We need to use __builtin_*_overflow from GCC/Clang to implement the overflow -// macros. Check __GNUC__ for availability of such builtins. -#ifdef __GNUC__ +// macros. Check __GNUC__ or __clang__ for availability of such builtins. +// Note that clang-cl defines __clang__ only and does not define __GNUC__ so we +// have to check for both. +#if defined(__GNUC__) || defined(__clang__) // clang/gcc overlay may provides similar macros, we need to avoid redefining // them. #ifndef __STDC_VERSION_STDCKDINT_H__ diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 9bd1e29081a801..0302ad64f8b5df 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -192,6 +192,9 @@ add_header_library( libc.src.__support.CPP.optional libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.config + libc.src.__support.macros.null_check + libc.src.__support.macros.optimization libc.src.errno.errno ) diff --git a/libc/src/__support/CPP/CMakeLists.txt b/libc/src/__support/CPP/CMakeLists.txt index f2e774f166f666..c1981b827042ca 100644 --- a/libc/src/__support/CPP/CMakeLists.txt +++ b/libc/src/__support/CPP/CMakeLists.txt @@ -199,4 +199,5 @@ add_object_library( DEPENDS libc.include.stdlib libc.src.__support.common + libc.src.__support.macros.properties.os ) diff --git a/libc/src/__support/CPP/new.cpp b/libc/src/__support/CPP/new.cpp index 5a40d4a6d3b272..88db8377b2fac4 100644 --- a/libc/src/__support/CPP/new.cpp +++ b/libc/src/__support/CPP/new.cpp @@ -16,15 +16,29 @@ void operator delete(void *mem, std::align_val_t) noexcept { ::free(mem); } void operator delete(void *mem, size_t) noexcept { ::free(mem); } void operator delete(void *mem, size_t, std::align_val_t) noexcept { +#ifdef LIBC_TARGET_OS_IS_WINDOWS + ::_aligned_free(mem); +#else ::free(mem); +#endif } void operator delete[](void *mem) noexcept { ::free(mem); } -void operator delete[](void *mem, std::align_val_t) noexcept { ::free(mem); } +void operator delete[](void *mem, std::align_val_t) noexcept { +#ifdef LIBC_TARGET_OS_IS_WINDOWS + ::_aligned_free(mem); +#else + ::free(mem); +#endif +} void operator delete[](void *mem, size_t) noexcept { ::free(mem); } void operator delete[](void *mem, size_t, std::align_val_t) noexcept { +#ifdef LIBC_TARGET_OS_IS_WINDOWS + ::_aligned_free(mem); +#else ::free(mem); +#endif } diff --git a/libc/src/__support/CPP/new.h b/libc/src/__support/CPP/new.h index 94a8466a39677b..c1b6b95033f84c 100644 --- a/libc/src/__support/CPP/new.h +++ b/libc/src/__support/CPP/new.h @@ -11,6 +11,7 @@ #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "src/__support/macros/properties/os.h" #include // For size_t #include // For malloc, free etc. @@ -47,7 +48,15 @@ class AllocChecker { LIBC_INLINE static void *aligned_alloc(size_t s, std::align_val_t align, AllocChecker &ac) { +#ifdef LIBC_TARGET_OS_IS_WINDOWS + // std::aligned_alloc is not available on Windows because std::free on + // Windows cannot deallocate any over-aligned memory. Microsoft provides an + // alternative for std::aligned_alloc named _aligned_malloc, but it must be + // paired with _aligned_free instead of std::free. + void *mem = ::_aligned_malloc(static_cast(align), s); +#else void *mem = ::aligned_alloc(static_cast(align), s); +#endif ac = (mem != nullptr); return mem; } diff --git a/libc/src/__support/OSUtil/io.h b/libc/src/__support/OSUtil/io.h index cb7e748fc64426..80119da77fc027 100644 --- a/libc/src/__support/OSUtil/io.h +++ b/libc/src/__support/OSUtil/io.h @@ -19,6 +19,8 @@ #include "linux/io.h" #elif defined(__Fuchsia__) #include "fuchsia/io.h" +#elif defined(_WIN32) +#include "windows/io.h" #elif defined(__ELF__) // TODO: Ideally we would have LIBC_TARGET_OS_IS_BAREMETAL. #include "baremetal/io.h" diff --git a/libc/src/__support/OSUtil/linux/CMakeLists.txt b/libc/src/__support/OSUtil/linux/CMakeLists.txt index 089cad454d534d..6c7014940407d8 100644 --- a/libc/src/__support/OSUtil/linux/CMakeLists.txt +++ b/libc/src/__support/OSUtil/linux/CMakeLists.txt @@ -23,3 +23,33 @@ add_object_library( libc.hdr.types.struct_f_owner_ex libc.hdr.types.off_t ) + +add_header_library( + vdso_sym + HDRS + vdso_sym.h + DEPENDS + libc.src.__support.common +) + +add_object_library( + vdso + HDRS + vdso.h + SRCS + vdso.cpp + DEPENDS + .${LIBC_TARGET_ARCHITECTURE}.vdso + libc.src.__support.CPP.array + libc.src.__support.CPP.optional + libc.src.__support.CPP.string_view + libc.src.__support.threads.callonce + libc.src.__support.threads.linux.futex_word_type + libc.hdr.types.struct_timeval + libc.hdr.types.struct_timespec + libc.hdr.types.clockid_t + libc.hdr.types.time_t + libc.hdr.link_macros + libc.src.errno.errno + libc.src.sys.auxv.getauxval +) diff --git a/libc/src/__support/OSUtil/linux/aarch64/CMakeLists.txt b/libc/src/__support/OSUtil/linux/aarch64/CMakeLists.txt index eea9badc46cae6..d9451a1af1df35 100644 --- a/libc/src/__support/OSUtil/linux/aarch64/CMakeLists.txt +++ b/libc/src/__support/OSUtil/linux/aarch64/CMakeLists.txt @@ -5,3 +5,13 @@ add_header_library( DEPENDS libc.src.__support.common ) + +add_header_library( + vdso + HDRS + vdso.h + DEPENDS + libc.src.__support.common + libc.src.__support.CPP.string_view + libc.src.__support.OSUtil.linux.vdso_sym +) diff --git a/libc/src/__support/OSUtil/linux/aarch64/vdso.h b/libc/src/__support/OSUtil/linux/aarch64/vdso.h new file mode 100644 index 00000000000000..3c4c6205071da2 --- /dev/null +++ b/libc/src/__support/OSUtil/linux/aarch64/vdso.h @@ -0,0 +1,37 @@ +//===---------- aarch64 vdso configuration ------------------------* C++ *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_AARCH64_VDSO_H +#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_AARCH64_VDSO_H +#include "src/__support/CPP/string_view.h" +#include "src/__support/OSUtil/linux/vdso_sym.h" +namespace LIBC_NAMESPACE_DECL { +namespace vdso { +// translate VDSOSym to symbol names +// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/kernel/vdso/vdso.lds.S +LIBC_INLINE constexpr cpp::string_view symbol_name(VDSOSym sym) { + switch (sym) { + case VDSOSym::RTSigReturn: + return "__kernel_rt_sigreturn"; + case VDSOSym::GetTimeOfDay: + return "__kernel_gettimeofday"; + case VDSOSym::ClockGetTime: + return "__kernel_clock_gettime"; + case VDSOSym::ClockGetRes: + return "__kernel_clock_getres"; + default: + return ""; + } +} + +// symbol versions +LIBC_INLINE constexpr cpp::string_view symbol_version(VDSOSym) { + return "LINUX_2.6.39"; +} +} // namespace vdso +} // namespace LIBC_NAMESPACE_DECL +#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_AARCH64_VDSO_H diff --git a/libc/src/__support/OSUtil/linux/arm/CMakeLists.txt b/libc/src/__support/OSUtil/linux/arm/CMakeLists.txt index 733366f6d4a2e3..d991f7e0914796 100644 --- a/libc/src/__support/OSUtil/linux/arm/CMakeLists.txt +++ b/libc/src/__support/OSUtil/linux/arm/CMakeLists.txt @@ -5,3 +5,13 @@ add_header_library( DEPENDS libc.src.__support.common ) + +add_header_library( + vdso + HDRS + vdso.h + DEPENDS + libc.src.__support.common + libc.src.__support.CPP.string_view + libc.src.__support.OSUtil.linux.vdso_sym +) diff --git a/libc/src/__support/OSUtil/linux/arm/vdso.h b/libc/src/__support/OSUtil/linux/arm/vdso.h new file mode 100644 index 00000000000000..3de5860359c155 --- /dev/null +++ b/libc/src/__support/OSUtil/linux/arm/vdso.h @@ -0,0 +1,37 @@ +//===---------- arm vdso configuration ----------------------------* C++ *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_ARM_VDSO_H +#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_ARM_VDSO_H +#include "src/__support/CPP/string_view.h" +#include "src/__support/OSUtil/linux/vdso_sym.h" +namespace LIBC_NAMESPACE_DECL { +namespace vdso { +// translate VDSOSym to symbol names +// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm/vdso/vdso.lds.S +LIBC_INLINE constexpr cpp::string_view symbol_name(VDSOSym sym) { + switch (sym) { + case VDSOSym::ClockGetTime: + return "__vdso_clock_gettime"; + case VDSOSym::GetTimeOfDay: + return "__vdso_gettimeofday"; + case VDSOSym::ClockGetRes: + return "__vdso_clock_getres"; + case VDSOSym::ClockGetTime64: + return "__vdso_clock_gettime64"; + default: + return ""; + } +} + +// symbol versions +LIBC_INLINE constexpr cpp::string_view symbol_version(VDSOSym) { + return "LINUX_2.6"; +} +} // namespace vdso +} // namespace LIBC_NAMESPACE_DECL +#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_ARM_VDSO_H diff --git a/libc/src/__support/OSUtil/linux/riscv/CMakeLists.txt b/libc/src/__support/OSUtil/linux/riscv/CMakeLists.txt index e271204f519820..eb93dd4af35ce7 100644 --- a/libc/src/__support/OSUtil/linux/riscv/CMakeLists.txt +++ b/libc/src/__support/OSUtil/linux/riscv/CMakeLists.txt @@ -5,3 +5,13 @@ add_header_library( DEPENDS libc.src.__support.common ) + +add_header_library( + vdso + HDRS + vdso.h + DEPENDS + libc.src.__support.common + libc.src.__support.CPP.string_view + libc.src.__support.OSUtil.linux.vdso_sym +) diff --git a/libc/src/__support/OSUtil/linux/riscv/vdso.h b/libc/src/__support/OSUtil/linux/riscv/vdso.h new file mode 100644 index 00000000000000..24ddb25ea980a5 --- /dev/null +++ b/libc/src/__support/OSUtil/linux/riscv/vdso.h @@ -0,0 +1,43 @@ +//===---------- RISC-V vdso configuration -------------------------* C++ *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_RISCV_VDSO_H +#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_RISCV_VDSO_H +#include "src/__support/CPP/string_view.h" +#include "src/__support/OSUtil/linux/vdso_sym.h" +namespace LIBC_NAMESPACE_DECL { +namespace vdso { +// translate VDSOSym to symbol names +// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/kernel/vdso/vdso.lds.S +LIBC_INLINE constexpr cpp::string_view symbol_name(VDSOSym sym) { + switch (sym) { + case VDSOSym::RTSigReturn: + return "__vdso_rt_sigreturn"; + case VDSOSym::GetTimeOfDay: + return "__vdso_gettimeofday"; + case VDSOSym::ClockGetTime: + return "__vdso_clock_gettime"; + case VDSOSym::ClockGetRes: + return "__vdso_clock_getres"; + case VDSOSym::GetCpu: + return "__vdso_getcpu"; + case VDSOSym::FlushICache: + return "__vdso_flush_icache"; + case VDSOSym::RiscvHwProbe: + return "__vdso_riscv_hwprobe"; + default: + return ""; + } +} + +// symbol versions +LIBC_INLINE constexpr cpp::string_view symbol_version(VDSOSym) { + return "LINUX_4.15"; +} +} // namespace vdso +} // namespace LIBC_NAMESPACE_DECL +#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_RISCV_VDSO_H diff --git a/libc/src/__support/OSUtil/linux/vdso.cpp b/libc/src/__support/OSUtil/linux/vdso.cpp new file mode 100644 index 00000000000000..cb43764badad1f --- /dev/null +++ b/libc/src/__support/OSUtil/linux/vdso.cpp @@ -0,0 +1,237 @@ +//===------------- Linux VDSO Implementation --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "src/__support/OSUtil/linux/vdso.h" +#include "hdr/link_macros.h" +#include "hdr/sys_auxv_macros.h" +#include "src/__support/CPP/array.h" +#include "src/__support/CPP/optional.h" +#include "src/__support/CPP/string_view.h" +#include "src/__support/threads/callonce.h" +#include "src/__support/threads/linux/futex_word.h" +#include "src/errno/libc_errno.h" +#include "src/sys/auxv/getauxval.h" +#include + +// TODO: This is a temporary workaround to avoid including elf.h +// Include our own headers for ElfW and friends once we have them. +namespace LIBC_NAMESPACE_DECL { + +namespace vdso { + +Symbol::VDSOArray Symbol::global_cache{}; +CallOnceFlag Symbol::once_flag = callonce_impl::NOT_CALLED; + +namespace { +// See https://refspecs.linuxfoundation.org/LSB_1.3.0/gLSB/gLSB/symverdefs.html +struct Verdaux { + ElfW(Word) vda_name; /* Version or dependency names */ + ElfW(Word) vda_next; /* Offset in bytes to next verdaux + entry */ +}; +struct Verdef { + ElfW(Half) vd_version; /* Version revision */ + ElfW(Half) vd_flags; /* Version information */ + ElfW(Half) vd_ndx; /* Version Index */ + ElfW(Half) vd_cnt; /* Number of associated aux entries */ + ElfW(Word) vd_hash; /* Version name hash value */ + ElfW(Word) vd_aux; /* Offset in bytes to verdaux array */ + ElfW(Word) vd_next; /* Offset in bytes to next verdef entry */ + Verdef *next() const { + if (vd_next == 0) + return nullptr; + return reinterpret_cast(reinterpret_cast(this) + + vd_next); + } + Verdaux *aux() const { + return reinterpret_cast(reinterpret_cast(this) + + vd_aux); + } +}; + +// version search procedure specified by +// https://refspecs.linuxfoundation.org/LSB_1.3.0/gLSB/gLSB/symversion.html#SYMVERTBL +cpp::string_view find_version(Verdef *verdef, ElfW(Half) * versym, + const char *strtab, size_t idx) { + constexpr ElfW(Half) VER_FLG_BASE = 0x1; + if (!versym) + return ""; + ElfW(Half) identifier = versym[idx] & 0x7FFF; + // iterate through all version definitions + for (Verdef *def = verdef; def != nullptr; def = def->next()) { + // skip if this is a file-level version + if (def->vd_flags & VER_FLG_BASE) + continue; + // check if the version identifier matches. Highest bit is used to determine + // whether the symbol is local. Only lower 15 bits are used for version + // identifier. + if ((def->vd_ndx & 0x7FFF) == identifier) { + Verdaux *aux = def->aux(); + return strtab + aux->vda_name; + } + } + return ""; +} + +size_t shdr_get_symbol_count(ElfW(Shdr) * vdso_shdr, size_t e_shnum) { + if (!vdso_shdr) + return 0; + // iterate all sections until we locate the dynamic symbol section + for (size_t i = 0; i < e_shnum; ++i) { + // dynamic symbol section is a table section + // therefore, the number of entries can be computed as the ratio + // of the section size to the size of a single entry + if (vdso_shdr[i].sh_type == SHT_DYNSYM) + return vdso_shdr[i].sh_size / vdso_shdr[i].sh_entsize; + } + return 0; +} + +struct VDSOSymbolTable { + const char *strtab; + ElfW(Sym) * symtab; + // The following can be nullptr if the vDSO does not have versioning + ElfW(Half) * versym; + Verdef *verdef; + + void populate_symbol_cache(Symbol::VDSOArray &symbol_table, + size_t symbol_count, ElfW(Addr) vdso_addr) { + for (size_t i = 0, e = symbol_table.size(); i < e; ++i) { + Symbol sym = i; + cpp::string_view name = sym.name(); + cpp::string_view version = sym.version(); + if (name.empty()) + continue; + + for (size_t j = 0; j < symbol_count; ++j) { + if (name == strtab + symtab[j].st_name) { + // we find a symbol with desired name + // now we need to check if it has the right version + if (versym && verdef && + version != find_version(verdef, versym, strtab, j)) + continue; + + // put the symbol address into the symbol table + symbol_table[i] = + reinterpret_cast(vdso_addr + symtab[j].st_value); + } + } + } + } +}; + +struct PhdrInfo { + ElfW(Addr) vdso_addr; + ElfW(Dyn) * vdso_dyn; + static cpp::optional from(ElfW(Phdr) * vdso_phdr, size_t e_phnum, + uintptr_t vdso_ehdr_addr) { + constexpr ElfW(Addr) INVALID_ADDR = static_cast(-1); + ElfW(Addr) vdso_addr = INVALID_ADDR; + ElfW(Dyn) *vdso_dyn = nullptr; + if (!vdso_phdr) + return cpp::nullopt; + // iterate through all the program headers until we get the desired pieces + for (size_t i = 0; i < e_phnum; ++i) { + if (vdso_phdr[i].p_type == PT_DYNAMIC) + vdso_dyn = reinterpret_cast(vdso_ehdr_addr + + vdso_phdr[i].p_offset); + + if (vdso_phdr[i].p_type == PT_LOAD) + vdso_addr = + vdso_ehdr_addr + vdso_phdr[i].p_offset - vdso_phdr[i].p_vaddr; + + if (vdso_addr && vdso_dyn) + return PhdrInfo{vdso_addr, vdso_dyn}; + } + + return cpp::nullopt; + } + + cpp::optional populate_symbol_table() { + const char *strtab = nullptr; + ElfW(Sym) *symtab = nullptr; + ElfW(Half) *versym = nullptr; + Verdef *verdef = nullptr; + for (ElfW(Dyn) *d = vdso_dyn; d->d_tag != DT_NULL; ++d) { + switch (d->d_tag) { + case DT_STRTAB: + strtab = reinterpret_cast(vdso_addr + d->d_un.d_ptr); + break; + case DT_SYMTAB: + symtab = reinterpret_cast(vdso_addr + d->d_un.d_ptr); + break; + case DT_VERSYM: + versym = reinterpret_cast(vdso_addr + d->d_un.d_ptr); + break; + case DT_VERDEF: + verdef = reinterpret_cast(vdso_addr + d->d_un.d_ptr); + break; + } + if (strtab && symtab && versym && verdef) + break; + } + if (strtab == nullptr || symtab == nullptr) + return cpp::nullopt; + + return VDSOSymbolTable{strtab, symtab, versym, verdef}; + } +}; +} // namespace + +void Symbol::initialize_vdso_global_cache() { + // first clear the symbol table + for (auto &i : global_cache) + i = nullptr; + + // get the address of the VDSO, protect errno since getauxval may change + // it + int errno_backup = libc_errno; + uintptr_t vdso_ehdr_addr = getauxval(AT_SYSINFO_EHDR); + // Get the memory address of the vDSO ELF header. + auto vdso_ehdr = reinterpret_cast(vdso_ehdr_addr); + // leave the table unpopulated if we don't have vDSO + if (vdso_ehdr == nullptr) { + libc_errno = errno_backup; + return; + } + + // locate the section header inside the elf using the section header + // offset + auto vdso_shdr = + reinterpret_cast(vdso_ehdr_addr + vdso_ehdr->e_shoff); + size_t symbol_count = shdr_get_symbol_count(vdso_shdr, vdso_ehdr->e_shnum); + + // early return if no symbol is found + if (symbol_count == 0) + return; + + // We need to find both the loadable segment and the dynamic linking of + // the vDSO. compute vdso_phdr as the program header using the program + // header offset + ElfW(Phdr) *vdso_phdr = + reinterpret_cast(vdso_ehdr_addr + vdso_ehdr->e_phoff); + cpp::optional phdr_info = + PhdrInfo::from(vdso_phdr, vdso_ehdr->e_phnum, vdso_ehdr_addr); + // early return if either the dynamic linking or the loadable segment is + // not found + if (!phdr_info.has_value()) + return; + + // now, locate several more tables inside the dynmaic linking section + cpp::optional vdso_symbol_table = + phdr_info->populate_symbol_table(); + + // early return if we can't find any required fields of the symbol table + if (!vdso_symbol_table.has_value()) + return; + + // finally, populate the global symbol table cache + vdso_symbol_table->populate_symbol_cache(global_cache, symbol_count, + phdr_info->vdso_addr); +} +} // namespace vdso +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/OSUtil/linux/vdso.h b/libc/src/__support/OSUtil/linux/vdso.h new file mode 100644 index 00000000000000..a5108b3a1fb5d3 --- /dev/null +++ b/libc/src/__support/OSUtil/linux/vdso.h @@ -0,0 +1,81 @@ +//===------------- Linux VDSO Header ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_VDSO_H +#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_VDSO_H +#include "src/__support/CPP/array.h" +#include "src/__support/common.h" +#include "src/__support/macros/attributes.h" +#include "src/__support/macros/properties/architectures.h" +#include "src/__support/threads/callonce.h" + +#if defined(LIBC_TARGET_ARCH_IS_X86) +#include "x86_64/vdso.h" +#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) +#include "aarch64/vdso.h" +#elif defined(LIBC_TARGET_ARCH_IS_ARM) +#include "arm/vdso.h" +#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) +#include "riscv/vdso.h" +#else +#error "unknown arch" +#endif + +namespace LIBC_NAMESPACE_DECL { +namespace vdso { + +class Symbol { + VDSOSym sym; + +public: + LIBC_INLINE_VAR static constexpr size_t COUNT = + static_cast(VDSOSym::VDSOSymCount); + LIBC_INLINE constexpr explicit Symbol(VDSOSym sym) : sym(sym) {} + LIBC_INLINE constexpr Symbol(size_t idx) : sym(static_cast(idx)) {} + LIBC_INLINE constexpr cpp::string_view name() const { + return symbol_name(sym); + } + LIBC_INLINE constexpr cpp::string_view version() const { + return symbol_version(sym); + } + LIBC_INLINE constexpr operator size_t() const { + return static_cast(sym); + } + LIBC_INLINE constexpr bool is_valid() const { + return *this < Symbol::global_cache.size(); + } + using VDSOArray = cpp::array; + +private: + static CallOnceFlag once_flag; + static VDSOArray global_cache; + static void initialize_vdso_global_cache(); + + LIBC_INLINE void *get() const { + if (name().empty() || !is_valid()) + return nullptr; + + callonce(&once_flag, Symbol::initialize_vdso_global_cache); + return (global_cache[*this]); + } + template friend struct TypedSymbol; +}; + +template struct TypedSymbol { + LIBC_INLINE constexpr operator VDSOSymType() const { + return cpp::bit_cast>(Symbol{sym}.get()); + } + template + LIBC_INLINE auto operator()(Args &&...args) const { + return this->operator VDSOSymType()(cpp::forward(args)...); + } +}; + +} // namespace vdso + +} // namespace LIBC_NAMESPACE_DECL +#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_VDSO_H diff --git a/libc/src/__support/OSUtil/linux/vdso_sym.h b/libc/src/__support/OSUtil/linux/vdso_sym.h new file mode 100644 index 00000000000000..eb5f204a82f304 --- /dev/null +++ b/libc/src/__support/OSUtil/linux/vdso_sym.h @@ -0,0 +1,70 @@ +//===------------- Linux VDSO Symbols ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "hdr/types/clock_t.h" +#include "hdr/types/clockid_t.h" +#include "hdr/types/struct_timespec.h" +#include "hdr/types/struct_timeval.h" +#include "hdr/types/time_t.h" +#include "src/__support/common.h" +#include // For size_t. + +// NOLINTBEGIN(llvmlibc-implementation-in-namespace) +// TODO: some of the following can be defined via proxy headers. +struct __kernel_timespec; +struct timezone; +struct riscv_hwprobe; +struct getcpu_cache; +struct cpu_set_t; +// NOLINTEND(llvmlibc-implementation-in-namespace) + +namespace LIBC_NAMESPACE_DECL { +namespace vdso { + +enum class VDSOSym { + ClockGetTime, + ClockGetTime64, + GetTimeOfDay, + GetCpu, + Time, + ClockGetRes, + RTSigReturn, + FlushICache, + RiscvHwProbe, + VDSOSymCount +}; + +template LIBC_INLINE constexpr auto dispatcher() { + if constexpr (sym == VDSOSym::ClockGetTime) + return static_cast(nullptr); + else if constexpr (sym == VDSOSym::ClockGetTime64) + return static_cast(nullptr); + else if constexpr (sym == VDSOSym::GetTimeOfDay) + return static_cast( + nullptr); + else if constexpr (sym == VDSOSym::GetCpu) + return static_cast( + nullptr); + else if constexpr (sym == VDSOSym::Time) + return static_cast(nullptr); + else if constexpr (sym == VDSOSym::ClockGetRes) + return static_cast(nullptr); + else if constexpr (sym == VDSOSym::RTSigReturn) + return static_cast(nullptr); + else if constexpr (sym == VDSOSym::FlushICache) + return static_cast(nullptr); + else if constexpr (sym == VDSOSym::RiscvHwProbe) + return static_cast(nullptr); + else + return static_cast(nullptr); +} + +template using VDSOSymType = decltype(dispatcher()); + +} // namespace vdso +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/OSUtil/linux/x86_64/CMakeLists.txt b/libc/src/__support/OSUtil/linux/x86_64/CMakeLists.txt index a7f2d74e6353e0..1324491f37b76b 100644 --- a/libc/src/__support/OSUtil/linux/x86_64/CMakeLists.txt +++ b/libc/src/__support/OSUtil/linux/x86_64/CMakeLists.txt @@ -5,3 +5,13 @@ add_header_library( DEPENDS libc.src.__support.common ) + +add_header_library( + vdso + HDRS + vdso.h + DEPENDS + libc.src.__support.common + libc.src.__support.CPP.string_view + libc.src.__support.OSUtil.linux.vdso_sym +) diff --git a/libc/src/__support/OSUtil/linux/x86_64/vdso.h b/libc/src/__support/OSUtil/linux/x86_64/vdso.h new file mode 100644 index 00000000000000..abe7c33e07cfab --- /dev/null +++ b/libc/src/__support/OSUtil/linux/x86_64/vdso.h @@ -0,0 +1,43 @@ +//===---------- x86/x86_64 vdso configuration ---------------------* C++ *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_X86_64_VDSO_H +#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_X86_64_VDSO_H +#include "src/__support/CPP/string_view.h" +#include "src/__support/OSUtil/linux/vdso_sym.h" +namespace LIBC_NAMESPACE_DECL { +namespace vdso { +// translate VDSOSym to symbol names +// On x86, there are symbols defined without the __vdso_ prefix, however, +// it is suggested that one should use the __vdso_ prefix. +// Additionally, there is also an __vdso_sgx_enter_enclave, it is for the SGX +// support, we do not include it here for now. +// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/entry/vdso/vdso.lds.S +LIBC_INLINE constexpr cpp::string_view symbol_name(VDSOSym sym) { + switch (sym) { + case VDSOSym::ClockGetTime: + return "__vdso_clock_gettime"; + case VDSOSym::GetTimeOfDay: + return "__vdso_gettimeofday"; + case VDSOSym::GetCpu: + return "__vdso_getcpu"; + case VDSOSym::Time: + return "__vdso_time"; + case VDSOSym::ClockGetRes: + return "__vdso_clock_getres"; + default: + return ""; + } +} + +// symbol versions +LIBC_INLINE constexpr cpp::string_view symbol_version(VDSOSym) { + return "LINUX_2.6"; +} +} // namespace vdso +} // namespace LIBC_NAMESPACE_DECL +#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_X86_64_VDSO_H diff --git a/libc/src/__support/OSUtil/windows/CMakeLists.txt b/libc/src/__support/OSUtil/windows/CMakeLists.txt new file mode 100644 index 00000000000000..be316d77f5d06a --- /dev/null +++ b/libc/src/__support/OSUtil/windows/CMakeLists.txt @@ -0,0 +1,10 @@ +add_object_library( + windows_util + SRCS + exit.cpp + io.cpp + HDRS + io.h + DEPENDS + libc.src.__support.macros.config +) diff --git a/libc/src/__support/OSUtil/windows/exit.cpp b/libc/src/__support/OSUtil/windows/exit.cpp new file mode 100644 index 00000000000000..369b07b848878e --- /dev/null +++ b/libc/src/__support/OSUtil/windows/exit.cpp @@ -0,0 +1,23 @@ +//===-- Windows implementation of an exit function ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/macros/config.h" + +// On Windows we cannot make direct syscalls since Microsoft changes system call +// IDs periodically. We must rely on functions exported from ntdll.dll or +// kernel32.dll to invoke system service procedures. +#define WIN32_LEAN_AND_MEAN +#include + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +[[noreturn]] void exit(int status) { ::ExitProcess(status); } + +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/OSUtil/windows/io.cpp b/libc/src/__support/OSUtil/windows/io.cpp new file mode 100644 index 00000000000000..af3d1b9e43976d --- /dev/null +++ b/libc/src/__support/OSUtil/windows/io.cpp @@ -0,0 +1,25 @@ +//===------------- Windows implementation of IO utils -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "io.h" +#include "src/__support/macros/config.h" + +// On Windows we cannot make direct syscalls since Microsoft changes system call +// IDs periodically. We must rely on functions exported from ntdll.dll or +// kernel32.dll to invoke system service procedures. +#define WIN32_LEAN_AND_MEAN +#include + +namespace LIBC_NAMESPACE_DECL { + +void write_to_stderr(cpp::string_view msg) { + ::HANDLE stream = ::GetStdHandle(STD_ERROR_HANDLE); + ::WriteFile(stream, msg.data(), msg.size(), nullptr, nullptr); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/OSUtil/windows/io.h b/libc/src/__support/OSUtil/windows/io.h new file mode 100644 index 00000000000000..bafc00254a7cff --- /dev/null +++ b/libc/src/__support/OSUtil/windows/io.h @@ -0,0 +1,21 @@ +//===------------- Windows implementation of IO utils -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_WINDOWS_IO_H +#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_WINDOWS_IO_H + +#include "src/__support/CPP/string_view.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +void write_to_stderr(cpp::string_view msg); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_WINDOWS_IO_H diff --git a/libc/src/__support/macros/CMakeLists.txt b/libc/src/__support/macros/CMakeLists.txt index bcd47c3651bf5d..99d4f640f283a4 100644 --- a/libc/src/__support/macros/CMakeLists.txt +++ b/libc/src/__support/macros/CMakeLists.txt @@ -27,3 +27,13 @@ add_header_library( DEPENDS libc.src.__support.macros.properties.compiler ) + +add_header_library( + null_check + HDRS + null_check.h + DEPENDS + .config + .optimization + .sanitizer +) diff --git a/libc/src/__support/macros/null_check.h b/libc/src/__support/macros/null_check.h new file mode 100644 index 00000000000000..400f7d809db4fa --- /dev/null +++ b/libc/src/__support/macros/null_check.h @@ -0,0 +1,33 @@ +//===-- Safe nullptr check --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MACROS_NULL_CHECK_H +#define LLVM_LIBC_SRC___SUPPORT_MACROS_NULL_CHECK_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" +#include "src/__support/macros/sanitizer.h" + +#if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER) +// Use volatile to prevent undefined behavior of dereferencing nullptr. +// Intentionally crashing with SIGSEGV. +#define LIBC_CRASH_ON_NULLPTR(PTR) \ + do { \ + if (LIBC_UNLIKELY(PTR == nullptr)) { \ + volatile auto *crashing = PTR; \ + [[maybe_unused]] volatile auto crash = *crashing; \ + __builtin_trap(); \ + } \ + } while (0) +#else +#define LIBC_CRASH_ON_NULLPTR(ptr) \ + do { \ + } while (0) +#endif + +#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_NULL_CHECK_H diff --git a/libc/src/__support/macros/properties/types.h b/libc/src/__support/macros/properties/types.h index 69ddc912238e74..3ede8a6503d771 100644 --- a/libc/src/__support/macros/properties/types.h +++ b/libc/src/__support/macros/properties/types.h @@ -35,7 +35,7 @@ #endif // UINT64_MAX // int128 / uint128 support -#if defined(__SIZEOF_INT128__) +#if defined(__SIZEOF_INT128__) && !defined(LIBC_TARGET_OS_IS_WINDOWS) #define LIBC_TYPES_HAS_INT128 #endif // defined(__SIZEOF_INT128__) diff --git a/libc/src/__support/macros/sanitizer.h b/libc/src/__support/macros/sanitizer.h index c4f8b5bce39755..c20412e0f8b69f 100644 --- a/libc/src/__support/macros/sanitizer.h +++ b/libc/src/__support/macros/sanitizer.h @@ -15,7 +15,25 @@ // Functions to unpoison memory //----------------------------------------------------------------------------- +#if LIBC_HAS_FEATURE(address_sanitizer) || defined(__SANITIZE_ADDRESS__) +#define LIBC_HAS_ADDRESS_SANITIZER +#endif + #if LIBC_HAS_FEATURE(memory_sanitizer) +#define LIBC_HAS_MEMORY_SANITIZER +#endif + +#if LIBC_HAS_FEATURE(undefined_behavior_sanitizer) +#define LIBC_HAS_UNDEFINED_BEHAVIOR_SANITIZER +#endif + +#if defined(LIBC_HAS_ADDRESS_SANITIZER) || \ + defined(LIBC_HAS_MEMORY_SANITIZER) || \ + defined(LIBC_HAS_UNDEFINED_BEHAVIOR_SANITIZER) +#define LIBC_HAS_SANITIZER +#endif + +#ifdef LIBC_HAS_MEMORY_SANITIZER // Only perform MSAN unpoison in non-constexpr context. #include #define MSAN_UNPOISON(addr, size) \ @@ -27,8 +45,7 @@ #define MSAN_UNPOISON(ptr, size) #endif -#if LIBC_HAS_FEATURE(address_sanitizer) -#define LIBC_HAVE_ADDRESS_SANITIZER +#ifdef LIBC_HAS_ADDRESS_SANITIZER #include #define ASAN_POISON_MEMORY_REGION(addr, size) \ __asan_poison_memory_region((addr), (size)) diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h index ffd6ebf27c7726..a452b3a55fdeb4 100644 --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -20,6 +20,8 @@ #include "src/__support/detailed_powers_of_ten.h" #include "src/__support/high_precision_decimal.h" #include "src/__support/macros/config.h" +#include "src/__support/macros/null_check.h" +#include "src/__support/macros/optimization.h" #include "src/__support/str_to_integer.h" #include "src/__support/str_to_num_result.h" #include "src/__support/uint128.h" @@ -1208,6 +1210,8 @@ template LIBC_INLINE StrToNumResult strtonan(const char *arg) { using FPBits = typename fputil::FPBits; using StorageType = typename FPBits::StorageType; + LIBC_CRASH_ON_NULLPTR(arg); + FPBits result; int error = 0; StorageType nan_mantissa = 0; diff --git a/libc/src/stdlib/atexit.cpp b/libc/src/stdlib/atexit.cpp index c8a15dd3cfef2d..799aad136bda5c 100644 --- a/libc/src/stdlib/atexit.cpp +++ b/libc/src/stdlib/atexit.cpp @@ -16,6 +16,7 @@ namespace LIBC_NAMESPACE_DECL { constinit ExitCallbackList atexit_callbacks; Mutex handler_list_mtx(false, false, false, false); +[[gnu::weak]] extern void teardown_main_tls(); extern "C" { @@ -24,8 +25,11 @@ int __cxa_atexit(AtExitCallback *callback, void *payload, void *) { } void __cxa_finalize(void *dso) { - if (!dso) + if (!dso) { call_exit_callbacks(atexit_callbacks); + if (teardown_main_tls) + teardown_main_tls(); + } } } // extern "C" diff --git a/libc/src/stdlib/quick_exit.cpp b/libc/src/stdlib/quick_exit.cpp index a5abf3e05d1a13..29110b33afcf50 100644 --- a/libc/src/stdlib/quick_exit.cpp +++ b/libc/src/stdlib/quick_exit.cpp @@ -16,9 +16,12 @@ namespace LIBC_NAMESPACE_DECL { extern ExitCallbackList at_quick_exit_callbacks; +[[gnu::weak]] extern void teardown_main_tls(); [[noreturn]] LLVM_LIBC_FUNCTION(void, quick_exit, (int status)) { call_exit_callbacks(at_quick_exit_callbacks); + if (teardown_main_tls) + teardown_main_tls(); internal::exit(status); } diff --git a/libc/src/sys/auxv/getauxval.h b/libc/src/sys/auxv/getauxval.h index 3e6971340bbef1..d9da45ff083981 100644 --- a/libc/src/sys/auxv/getauxval.h +++ b/libc/src/sys/auxv/getauxval.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_SYS_AUXV_GETAUXVAL_H #define LLVM_LIBC_SRC_SYS_AUXV_GETAUXVAL_H +#include "hdr/sys_auxv_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp index f09541b0d55808..8bd0c3a938d029 100644 --- a/libc/startup/gpu/amdgpu/start.cpp +++ b/libc/startup/gpu/amdgpu/start.cpp @@ -17,6 +17,9 @@ extern "C" int main(int argc, char **argv, char **envp); namespace LIBC_NAMESPACE_DECL { +// FIXME: Factor this out into common logic so we don't need to stub it here. +void teardown_main_tls() {} + DataEnvironment app; extern "C" uintptr_t __init_array_start[]; diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp index ef1e63e5161a61..bc529b36f50970 100644 --- a/libc/startup/gpu/nvptx/start.cpp +++ b/libc/startup/gpu/nvptx/start.cpp @@ -19,6 +19,9 @@ namespace LIBC_NAMESPACE_DECL { DataEnvironment app; +// FIXME: Factor this out into common logic so we don't need to stub it here. +void teardown_main_tls() {} + extern "C" { // Nvidia's 'nvlink' linker does not provide these symbols. We instead need // to manually create them and update the globals in the loader implememtation. diff --git a/libc/startup/linux/do_start.cpp b/libc/startup/linux/do_start.cpp index 72060b4adb2148..ff104c7f0d1d2f 100644 --- a/libc/startup/linux/do_start.cpp +++ b/libc/startup/linux/do_start.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// #include "startup/linux/do_start.h" +#include "config/linux/app.h" #include "include/llvm-libc-macros/link-macros.h" #include "src/__support/OSUtil/syscall.h" #include "src/__support/macros/config.h" @@ -60,6 +61,10 @@ static void call_fini_array_callbacks() { } static ThreadAttributes main_thread_attrib; +static TLSDescriptor tls; +// We separate teardown_main_tls from callbacks as callback function themselves +// may require TLS. +void teardown_main_tls() { cleanup_tls(tls.addr, tls.size); } [[noreturn]] void do_start() { auto tid = syscall_impl(SYS_gettid); @@ -122,7 +127,6 @@ static ThreadAttributes main_thread_attrib; // This descriptor has to be static since its cleanup function cannot // capture the context. - static TLSDescriptor tls; init_tls(tls); if (tls.size != 0 && !set_thread_ptr(tls.tp)) syscall_impl(SYS_exit, 1); @@ -130,10 +134,7 @@ static ThreadAttributes main_thread_attrib; self.attrib = &main_thread_attrib; main_thread_attrib.atexit_callback_mgr = internal::get_thread_atexit_callback_mgr(); - // We register the cleanup_tls function to be the last atexit callback to be - // invoked. It will tear down the TLS. Other callbacks may depend on TLS (such - // as the stack protector canary). - atexit([]() { cleanup_tls(tls.addr, tls.size); }); + // We want the fini array callbacks to be run after other atexit // callbacks are run. So, we register them before running the init // array callbacks as they can potentially register their own atexit diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 90de520405981b..7ad262d5f1f300 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -140,9 +140,14 @@ add_libc_test( arg_list_test.cpp DEPENDS libc.src.__support.arg_list + libc.src.__support.macros.properties.os ) -if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX) +# TODO: clang-cl generates calls into runtime library functions to +# handle 128-bit integer arithmetics and conversions which are not yet +# available on Windows. Re-enable 128-bit integer support on Windows once +# these functions are ready. +if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX AND NOT LIBC_TARGET_OS_IS_WINDOWS) add_libc_test( big_int_test SUITE diff --git a/libc/test/src/__support/FPUtil/CMakeLists.txt b/libc/test/src/__support/FPUtil/CMakeLists.txt index 22fbd2664b546b..1e64e9ba425a58 100644 --- a/libc/test/src/__support/FPUtil/CMakeLists.txt +++ b/libc/test/src/__support/FPUtil/CMakeLists.txt @@ -25,6 +25,7 @@ add_libc_test( libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.fpbits_str libc.src.__support.integer_literals + libc.src.__support.macros.properties.types libc.src.__support.sign ) diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp index 99acc03010344f..df50d8546f34f2 100644 --- a/libc/test/src/__support/FPUtil/fpbits_test.cpp +++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp @@ -9,6 +9,7 @@ #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/fpbits_str.h" #include "src/__support/integer_literals.h" +#include "src/__support/macros/properties/types.h" #include "src/__support/sign.h" // Sign #include "test/UnitTest/Test.h" @@ -425,13 +426,10 @@ TEST(LlvmLibcFPBitsTest, DoubleType) { EXPECT_EQ(quiet_nan.is_quiet_nan(), true); } -#ifdef LIBC_TARGET_ARCH_IS_X86 +#ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { using LongDoubleBits = FPBits; - if constexpr (sizeof(long double) == sizeof(double)) - return; // The tests for the "double" type cover for this case. - EXPECT_STREQ(LIBC_NAMESPACE::str(LongDoubleBits::inf(Sign::POS)).c_str(), "(+Infinity)"); EXPECT_STREQ(LIBC_NAMESPACE::str(LongDoubleBits::inf(Sign::NEG)).c_str(), diff --git a/libc/test/src/__support/OSUtil/linux/CMakeLists.txt b/libc/test/src/__support/OSUtil/linux/CMakeLists.txt index bfb072c03e9712..ff82616cc4a701 100644 --- a/libc/test/src/__support/OSUtil/linux/CMakeLists.txt +++ b/libc/test/src/__support/OSUtil/linux/CMakeLists.txt @@ -1,3 +1,21 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE}) add_subdirectory(${LIBC_TARGET_ARCHITECTURE}) endif() + +add_libc_test( + vdso_test + SUITE libc-osutil-tests + SRCS vdso_test.cpp + DEPENDS + libc.src.__support.OSUtil.linux.vdso + libc.src.__support.OSUtil.osutil + libc.hdr.types.struct_sigaction + libc.hdr.types.struct_timeval + libc.hdr.types.struct_timespec + libc.hdr.types.clockid_t + libc.hdr.types.time_t + libc.hdr.time_macros + libc.hdr.signal_macros + libc.src.signal.sigaction + libc.src.signal.raise +) diff --git a/libc/test/src/__support/OSUtil/linux/vdso_test.cpp b/libc/test/src/__support/OSUtil/linux/vdso_test.cpp new file mode 100644 index 00000000000000..2363db69c02f97 --- /dev/null +++ b/libc/test/src/__support/OSUtil/linux/vdso_test.cpp @@ -0,0 +1,162 @@ +//===-- Unittests for VDSO ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/signal_macros.h" +#include "hdr/time_macros.h" +#include "hdr/types/clockid_t.h" +#include "hdr/types/struct_sigaction.h" +#include "hdr/types/struct_timespec.h" +#include "hdr/types/struct_timeval.h" +#include "hdr/types/time_t.h" +#include "src/__support/OSUtil/linux/vdso.h" +#include "src/__support/OSUtil/syscall.h" +#include "src/__support/macros/properties/architectures.h" +#include "src/signal/raise.h" +#include "src/signal/sigaction.h" +#include "test/UnitTest/ErrnoSetterMatcher.h" +#include "test/UnitTest/LibcTest.h" +#include "test/UnitTest/Test.h" +#include +#include + +struct riscv_hwprobe { + int64_t key; + uint64_t value; +}; + +namespace LIBC_NAMESPACE_DECL { +// For x86_64, we explicitly test some traditional vdso symbols are indeed +// available. + +TEST(LlvmLibcOSUtilVDSOTest, GetTimeOfDay) { + vdso::TypedSymbol symbol; + if (!symbol) + return; + timeval tv; + EXPECT_EQ(symbol(&tv, nullptr), 0); + // hopefully people are not building time machines using our libc. + EXPECT_GT(tv.tv_sec, static_cast(0)); +} + +TEST(LlvmLibcOSUtilVDSOTest, Time) { + vdso::TypedSymbol symbol; + if (!symbol) + return; + time_t a, b; + EXPECT_GT(symbol(&a), static_cast(0)); + EXPECT_GT(symbol(&b), static_cast(0)); + EXPECT_GE(b, a); +} + +TEST(LlvmLibcOSUtilVDSOTest, ClockGetTime) { + vdso::TypedSymbol symbol; + if (!symbol) + return; + timespec a, b; + EXPECT_EQ(symbol(CLOCK_MONOTONIC, &a), 0); + EXPECT_EQ(symbol(CLOCK_MONOTONIC, &b), 0); + if (a.tv_sec == b.tv_sec) { + EXPECT_LT(a.tv_nsec, b.tv_nsec); + } else { + EXPECT_LT(a.tv_sec, b.tv_sec); + } +} + +TEST(LlvmLibcOSUtilVDSOTest, ClockGetTime64) { + vdso::TypedSymbol symbol; + if (!symbol) + return; + // See kernel API at + // https://elixir.bootlin.com/linux/latest/source/tools/testing/selftests/vDSO/vdso_test_correctness.c#L155 + __kernel_timespec a, b; + EXPECT_EQ(symbol(CLOCK_MONOTONIC, &a), 0); + EXPECT_EQ(symbol(CLOCK_MONOTONIC, &b), 0); + if (a.tv_sec == b.tv_sec) { + EXPECT_LT(a.tv_nsec, b.tv_nsec); + } else { + EXPECT_LT(a.tv_sec, b.tv_sec); + } +} + +TEST(LlvmLibcOSUtilVDSOTest, ClockGetRes) { + vdso::TypedSymbol symbol; + if (!symbol) + return; + timespec res{}; + EXPECT_EQ(symbol(CLOCK_MONOTONIC, &res), 0); + EXPECT_TRUE(res.tv_sec > 0 || res.tv_nsec > 0); +} + +TEST(LlvmLibcOSUtilVDSOTest, GetCpu) { + // The kernel system call has a third argument, which should be passed as + // nullptr. + vdso::TypedSymbol symbol; + if (!symbol) + return; + unsigned cpu = static_cast(-1), node = static_cast(-1); + EXPECT_EQ(symbol(&cpu, &node, nullptr), 0); + EXPECT_GE(cpu, 0u); + EXPECT_GE(node, 0u); +} + +static bool flag = false; +static void sigprof_handler [[gnu::used]] (int) { flag = true; } + +TEST(LlvmLibcOSUtilVDSOTest, RtSigReturn) { + using namespace testing::ErrnoSetterMatcher; + // must use struct since there is a function of the same name in the same + // scope. + struct sigaction sa {}; + struct sigaction old_sa {}; + sa.sa_handler = sigprof_handler; + sa.sa_flags = SA_RESTORER; + vdso::TypedSymbol symbol; + if (!symbol) + return; + sa.sa_restorer = symbol; + ASSERT_THAT(LIBC_NAMESPACE::sigaction(SIGPROF, &sa, &old_sa), Succeeds()); + raise(SIGPROF); + ASSERT_TRUE(flag); + flag = false; + ASSERT_THAT(LIBC_NAMESPACE::sigaction(SIGPROF, &old_sa, nullptr), Succeeds()); +} + +TEST(LlvmLibcOSUtilVDSOTest, FlushICache) { + vdso::TypedSymbol symbol; + if (!symbol) + return; + char buf[512]; + // we just check that the flush will not panic the program. + // the flags part only take 0/1 as up to kernel 6.10, which is used to + // indicate whether the flush is local to the core or global. + symbol(buf, buf + sizeof(buf), 0); + symbol(buf, buf + sizeof(buf), 1); +} + +// https://docs.kernel.org/6.5/riscv/hwprobe.html +TEST(LlvmLibcOSUtilVDSOTest, RiscvHwProbe) { + using namespace testing::ErrnoSetterMatcher; + vdso::TypedSymbol symbol; + if (!symbol) + return; + // If a key is unknown to the kernel, its key field will be cleared to -1, and + // its value set to 0. We expect probes.value are all 0. + // Usermode can supply NULL for cpus and 0 for cpu_count as a shortcut for all + // online CPUs + riscv_hwprobe probes[2] = {{-1, 1}, {-1, 1}}; + ASSERT_THAT(symbol(/*pairs=*/probes, /*count=*/2, /*cpusetsize=*/0, + /*cpuset=*/nullptr, + /*flags=*/0), + Succeeds()); + for (auto &probe : probes) { + EXPECT_EQ(probe.key, static_cast(-1)); + EXPECT_EQ(probe.value, static_cast(0)); + } +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/__support/arg_list_test.cpp b/libc/test/src/__support/arg_list_test.cpp index 4f229e2bfe6940..79a715e9106870 100644 --- a/libc/test/src/__support/arg_list_test.cpp +++ b/libc/test/src/__support/arg_list_test.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/__support/arg_list.h" +#include "src/__support/macros/properties/os.h" #include "test/UnitTest/Test.h" @@ -120,7 +121,7 @@ TEST(LlvmLibcArgListTest, TestStructTypes) { } // Test vector extensions from clang. -#if __has_attribute(ext_vector_type) +#if !defined(LIBC_TARGET_OS_IS_WINDOWS) && __has_attribute(ext_vector_type) using int1 = int __attribute__((ext_vector_type(1))); using int2 = int __attribute__((ext_vector_type(2))); diff --git a/libc/test/src/compiler/CMakeLists.txt b/libc/test/src/compiler/CMakeLists.txt index 65a9acceb6f7f1..a45fa8c55e5128 100644 --- a/libc/test/src/compiler/CMakeLists.txt +++ b/libc/test/src/compiler/CMakeLists.txt @@ -7,6 +7,7 @@ add_libc_unittest( SRCS stack_chk_guard_test.cpp DEPENDS + libc.hdr.signal_macros libc.src.__support.macros.sanitizer libc.src.compiler.__stack_chk_fail libc.src.string.memset diff --git a/libc/test/src/compiler/stack_chk_guard_test.cpp b/libc/test/src/compiler/stack_chk_guard_test.cpp index 6b71e155fa3e4d..4ec8398c9fc95d 100644 --- a/libc/test/src/compiler/stack_chk_guard_test.cpp +++ b/libc/test/src/compiler/stack_chk_guard_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/signal-macros.h" +#include "hdr/signal_macros.h" #include "src/__support/macros/sanitizer.h" #include "src/compiler/__stack_chk_fail.h" #include "src/string/memset.h" @@ -18,7 +18,7 @@ TEST(LlvmLibcStackChkFail, Death) { // Disable the test when asan is enabled so that it doesn't immediately fail // after the memset, but before the stack canary is re-checked. -#ifndef LIBC_HAVE_ADDRESS_SANITIZER +#ifndef LIBC_HAS_ADDRESS_SANITIZER TEST(LlvmLibcStackChkFail, Smash) { EXPECT_DEATH( [] { @@ -27,4 +27,4 @@ TEST(LlvmLibcStackChkFail, Smash) { }, WITH_SIGNAL(SIGABRT)); } -#endif // LIBC_HAVE_ADDRESS_SANITIZER +#endif // LIBC_HAS_ADDRESS_SANITIZER diff --git a/libc/test/src/fenv/CMakeLists.txt b/libc/test/src/fenv/CMakeLists.txt index b776f9a0706e86..d79b4a49a5e4f3 100644 --- a/libc/test/src/fenv/CMakeLists.txt +++ b/libc/test/src/fenv/CMakeLists.txt @@ -41,6 +41,7 @@ add_libc_unittest( libc.src.fenv.fesetenv libc.src.fenv.fesetround libc.src.__support.FPUtil.fenv_impl + libc.src.__support.macros.properties.os LINK_LIBRARIES LibcFPTestHelpers ) diff --git a/libc/test/src/fenv/getenv_and_setenv_test.cpp b/libc/test/src/fenv/getenv_and_setenv_test.cpp index 8fc2787ecb5b1e..fa4ef662222afa 100644 --- a/libc/test/src/fenv/getenv_and_setenv_test.cpp +++ b/libc/test/src/fenv/getenv_and_setenv_test.cpp @@ -13,6 +13,7 @@ #include "src/fenv/fesetround.h" #include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/macros/properties/os.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/Test.h" @@ -20,6 +21,7 @@ using LlvmLibcFEnvTest = LIBC_NAMESPACE::testing::FEnvSafeTest; +#ifndef LIBC_TARGET_OS_IS_WINDOWS TEST_F(LlvmLibcFEnvTest, GetEnvAndSetEnv) { // We will disable all exceptions to prevent invocation of the exception // handler. @@ -71,8 +73,9 @@ TEST_F(LlvmLibcFEnvTest, Set_FE_DFL_ENV) { int rm = LIBC_NAMESPACE::fegetround(); EXPECT_EQ(rm, FE_TONEAREST); } +#endif -#ifdef _WIN32 +#ifdef LIBC_TARGET_OS_IS_WINDOWS TEST_F(LlvmLibcFEnvTest, Windows_Set_Get_Test) { // If a valid fenv_t is written, then reading it back out should be identical. fenv_t setEnv = {0x7e00053e, 0x0f00000f}; diff --git a/libc/test/src/math/smoke/AddTest.h b/libc/test/src/math/smoke/AddTest.h index 0b7e395a22d4cd..88c2067ca14748 100644 --- a/libc/test/src/math/smoke/AddTest.h +++ b/libc/test/src/math/smoke/AddTest.h @@ -12,6 +12,7 @@ #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" #include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/macros/properties/os.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" @@ -53,6 +54,7 @@ class AddTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { } void test_range_errors(AddFunc func) { +#ifndef LIBC_TARGET_OS_IS_WINDOWS using namespace LIBC_NAMESPACE::fputil::testing; if (ForceRoundingMode r(RoundingMode::Nearest); r.success) { @@ -121,6 +123,7 @@ class AddTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { FE_UNDERFLOW | FE_INEXACT); EXPECT_MATH_ERRNO(ERANGE); } +#endif } void test_inexact_results(AddFunc func) { diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 7271e933b9311d..47e16926f10df1 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -153,6 +153,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.fadd + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -168,6 +169,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.faddl + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -183,6 +185,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.faddf128 + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -424,6 +427,7 @@ add_fp_unittest( libc.src.errno.errno libc.hdr.fenv_macros libc.src.math.dsubl + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -438,6 +442,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.dsubf128 + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -2895,9 +2900,10 @@ add_fp_unittest( SRCS nanf_test.cpp DEPENDS - libc.include.signal + libc.hdr.signal_macros libc.src.math.nanf libc.src.__support.FPUtil.fp_bits + libc.src.__support.macros.sanitizer # FIXME: The nan tests currently have death tests, which aren't supported for # hermetic tests. UNIT_TEST_ONLY @@ -2910,9 +2916,10 @@ add_fp_unittest( SRCS nan_test.cpp DEPENDS - libc.include.signal + libc.hdr.signal_macros libc.src.math.nan libc.src.__support.FPUtil.fp_bits + libc.src.__support.macros.sanitizer # FIXME: The nan tests currently have death tests, which aren't supported for # hermetic tests. UNIT_TEST_ONLY @@ -2925,9 +2932,10 @@ add_fp_unittest( SRCS nanl_test.cpp DEPENDS - libc.include.signal + libc.hdr.signal_macros libc.src.math.nanl libc.src.__support.FPUtil.fp_bits + libc.src.__support.macros.sanitizer # FIXME: The nan tests currently have death tests, which aren't supported for # hermetic tests. UNIT_TEST_ONLY @@ -2940,7 +2948,7 @@ add_fp_unittest( SRCS nanf16_test.cpp DEPENDS - libc.include.signal + libc.hdr.signal_macros libc.src.math.nanf16 libc.src.__support.FPUtil.fp_bits libc.src.__support.macros.sanitizer @@ -2956,9 +2964,10 @@ add_fp_unittest( SRCS nanf128_test.cpp DEPENDS - libc.include.signal + libc.hdr.signal_macros libc.src.math.nanf128 libc.src.__support.FPUtil.fp_bits + libc.src.__support.macros.sanitizer # FIXME: The nan tests currently have death tests, which aren't supported for # hermetic tests. UNIT_TEST_ONLY @@ -4169,6 +4178,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.f16add + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -4183,6 +4193,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.f16addf + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -4197,6 +4208,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.f16addl + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -4211,6 +4223,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.f16addf128 + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -4225,6 +4238,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.f16sub + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -4239,6 +4253,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.f16subf + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -4253,6 +4268,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.f16subl + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -4267,6 +4283,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.f16subf128 + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -4548,6 +4565,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.fsub + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -4562,6 +4580,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.fsubl + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -4576,6 +4595,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.fsubf128 + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -4749,6 +4769,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.daddl + libc.src.__support.macros.properties.os ) add_fp_unittest( @@ -4763,6 +4784,7 @@ add_fp_unittest( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.math.daddf128 + libc.src.__support.macros.properties.os ) add_fp_unittest( diff --git a/libc/test/src/math/smoke/SubTest.h b/libc/test/src/math/smoke/SubTest.h index 9ee4220b382085..99c4b6c760af72 100644 --- a/libc/test/src/math/smoke/SubTest.h +++ b/libc/test/src/math/smoke/SubTest.h @@ -11,6 +11,7 @@ #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" +#include "src/__support/macros/properties/os.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" @@ -52,6 +53,7 @@ class SubTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { } void test_range_errors(SubFunc func) { +#ifndef LIBC_TARGET_OS_IS_WINDOWS using namespace LIBC_NAMESPACE::fputil::testing; if (ForceRoundingMode r(RoundingMode::Nearest); r.success) { @@ -123,6 +125,7 @@ class SubTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { FE_UNDERFLOW | FE_INEXACT); EXPECT_MATH_ERRNO(ERANGE); } +#endif } void test_inexact_results(SubFunc func) { diff --git a/libc/test/src/math/smoke/nan_test.cpp b/libc/test/src/math/smoke/nan_test.cpp index 68c844181a1946..46b9e9aa9563ab 100644 --- a/libc/test/src/math/smoke/nan_test.cpp +++ b/libc/test/src/math/smoke/nan_test.cpp @@ -6,12 +6,13 @@ // //===----------------------------------------------------------------------===// +#include "hdr/signal_macros.h" #include "src/__support/FPUtil/FPBits.h" +#include "src/__support/macros/sanitizer.h" #include "src/math/nan.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include class LlvmLibcNanTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { public: @@ -43,8 +44,8 @@ TEST_F(LlvmLibcNanTest, RandomString) { run_test("123 ", 0x7ff8000000000000); } -#if !defined(LIBC_HAVE_ADDRESS_SANITIZER) && defined(LIBC_TARGET_OS_IS_LINUX) +#if !defined(LIBC_HAS_ADDRESS_SANITIZER) && defined(LIBC_TARGET_OS_IS_LINUX) TEST_F(LlvmLibcNanTest, InvalidInput) { EXPECT_DEATH([] { LIBC_NAMESPACE::nan(nullptr); }, WITH_SIGNAL(SIGSEGV)); } -#endif // LIBC_HAVE_ADDRESS_SANITIZER +#endif // LIBC_HAS_ADDRESS_SANITIZER diff --git a/libc/test/src/math/smoke/nanf128_test.cpp b/libc/test/src/math/smoke/nanf128_test.cpp index 015cc31e4be237..25dd2ef1d5b1ca 100644 --- a/libc/test/src/math/smoke/nanf128_test.cpp +++ b/libc/test/src/math/smoke/nanf128_test.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// +#include "hdr/signal_macros.h" #include "src/__support/FPUtil/FPBits.h" +#include "src/__support/macros/sanitizer.h" #include "src/__support/uint128.h" #include "src/math/nanf128.h" #include "test/UnitTest/FEnvSafeTest.h" @@ -53,9 +55,8 @@ TEST_F(LlvmLibcNanf128Test, RandomString) { QUIET_NAN); } -#if !defined(LIBC_HAVE_ADDRESS_SANITIZER) && defined(LIBC_TARGET_OS_IS_LINUX) -#include +#if !defined(LIBC_HAS_ADDRESS_SANITIZER) && defined(LIBC_TARGET_OS_IS_LINUX) TEST_F(LlvmLibcNanf128Test, InvalidInput) { EXPECT_DEATH([] { LIBC_NAMESPACE::nanf128(nullptr); }, WITH_SIGNAL(SIGSEGV)); } -#endif // LIBC_HAVE_ADDRESS_SANITIZER +#endif // LIBC_HAS_ADDRESS_SANITIZER diff --git a/libc/test/src/math/smoke/nanf16_test.cpp b/libc/test/src/math/smoke/nanf16_test.cpp index 81b844bf6bb59c..ec640a3b9eef92 100644 --- a/libc/test/src/math/smoke/nanf16_test.cpp +++ b/libc/test/src/math/smoke/nanf16_test.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "hdr/signal_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/macros/sanitizer.h" #include "src/math/nanf16.h" @@ -13,8 +14,6 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include - class LlvmLibcNanf16Test : public LIBC_NAMESPACE::testing::FEnvSafeTest { public: using StorageType = LIBC_NAMESPACE::fputil::FPBits::StorageType; @@ -44,8 +43,8 @@ TEST_F(LlvmLibcNanf16Test, RandomString) { run_test("123 ", 0x7e00); } -#if !defined(LIBC_HAVE_ADDRESS_SANITIZER) && defined(LIBC_TARGET_OS_IS_LINUX) +#if !defined(LIBC_HAS_ADDRESS_SANITIZER) && defined(LIBC_TARGET_OS_IS_LINUX) TEST_F(LlvmLibcNanf16Test, InvalidInput) { EXPECT_DEATH([] { LIBC_NAMESPACE::nanf16(nullptr); }, WITH_SIGNAL(SIGSEGV)); } -#endif // LIBC_HAVE_ADDRESS_SANITIZER +#endif // LIBC_HAS_ADDRESS_SANITIZER diff --git a/libc/test/src/math/smoke/nanf_test.cpp b/libc/test/src/math/smoke/nanf_test.cpp index ff5823685225ce..dd3124ee9c5112 100644 --- a/libc/test/src/math/smoke/nanf_test.cpp +++ b/libc/test/src/math/smoke/nanf_test.cpp @@ -6,12 +6,13 @@ // //===----------------------------------------------------------------------===// +#include "hdr/signal_macros.h" #include "src/__support/FPUtil/FPBits.h" +#include "src/__support/macros/sanitizer.h" #include "src/math/nanf.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include class LlvmLibcNanfTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { public: @@ -42,8 +43,8 @@ TEST_F(LlvmLibcNanfTest, RandomString) { run_test("123 ", 0x7fc00000); } -#if !defined(LIBC_HAVE_ADDRESS_SANITIZER) && defined(LIBC_TARGET_OS_IS_LINUX) +#if !defined(LIBC_HAS_ADDRESS_SANITIZER) && defined(LIBC_TARGET_OS_IS_LINUX) TEST_F(LlvmLibcNanfTest, InvalidInput) { EXPECT_DEATH([] { LIBC_NAMESPACE::nanf(nullptr); }, WITH_SIGNAL(SIGSEGV)); } -#endif // LIBC_HAVE_ADDRESS_SANITIZER +#endif // LIBC_HAS_ADDRESS_SANITIZER diff --git a/libc/test/src/math/smoke/nanl_test.cpp b/libc/test/src/math/smoke/nanl_test.cpp index de9af05100c10a..ef3f9c15dafd9f 100644 --- a/libc/test/src/math/smoke/nanl_test.cpp +++ b/libc/test/src/math/smoke/nanl_test.cpp @@ -6,12 +6,13 @@ // //===----------------------------------------------------------------------===// +#include "hdr/signal_macros.h" #include "src/__support/FPUtil/FPBits.h" +#include "src/__support/macros/sanitizer.h" #include "src/math/nanl.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include #if defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) #define SELECT_LONG_DOUBLE(val, _, __) val @@ -70,8 +71,8 @@ TEST_F(LlvmLibcNanlTest, RandomString) { run_test("123 ", expected); } -#if !defined(LIBC_HAVE_ADDRESS_SANITIZER) && defined(LIBC_TARGET_OS_IS_LINUX) +#if !defined(LIBC_HAS_ADDRESS_SANITIZER) && defined(LIBC_TARGET_OS_IS_LINUX) TEST_F(LlvmLibcNanlTest, InvalidInput) { EXPECT_DEATH([] { LIBC_NAMESPACE::nanl(nullptr); }, WITH_SIGNAL(SIGSEGV)); } -#endif // LIBC_HAVE_ADDRESS_SANITIZER +#endif // LIBC_HAS_ADDRESS_SANITIZER diff --git a/libc/test/src/string/memory_utils/CMakeLists.txt b/libc/test/src/string/memory_utils/CMakeLists.txt index a0dddd2f97b585..8374be4a1d01a3 100644 --- a/libc/test/src/string/memory_utils/CMakeLists.txt +++ b/libc/test/src/string/memory_utils/CMakeLists.txt @@ -12,6 +12,7 @@ add_libc_test( libc.src.__support.CPP.array libc.src.__support.CPP.cstddef libc.src.__support.CPP.span + libc.src.__support.macros.properties.os libc.src.__support.macros.properties.types libc.src.__support.macros.sanitizer libc.src.string.memory_utils.memory_utils diff --git a/libc/test/src/string/memory_utils/op_tests.cpp b/libc/test/src/string/memory_utils/op_tests.cpp index 978561f31a2961..c6197d1afa266b 100644 --- a/libc/test/src/string/memory_utils/op_tests.cpp +++ b/libc/test/src/string/memory_utils/op_tests.cpp @@ -8,6 +8,7 @@ #include "memory_check_utils.h" #include "src/__support/macros/config.h" +#include "src/__support/macros/properties/os.h" #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT64 #include "src/string/memory_utils/op_aarch64.h" #include "src/string/memory_utils/op_builtin.h" @@ -294,7 +295,7 @@ TYPED_TEST(LlvmLibcOpTest, Bcmp, BcmpImplementations) { #endif // LIBC_TARGET_ARCH_IS_X86_64 using MemcmpImplementations = testing::TypeList< -#ifdef LIBC_TARGET_ARCH_IS_X86_64 +#if defined(LIBC_TARGET_ARCH_IS_X86_64) && !defined(LIBC_TARGET_OS_IS_WINDOWS) #ifdef __SSE2__ generic::Memcmp<__m128i>, // #endif diff --git a/libcxx/docs/Status/Cxx17Issues.csv b/libcxx/docs/Status/Cxx17Issues.csv index 7119382eb5cfb4..af3dee9ca50c98 100644 --- a/libcxx/docs/Status/Cxx17Issues.csv +++ b/libcxx/docs/Status/Cxx17Issues.csv @@ -306,7 +306,7 @@ "`LWG2905 `__","is_constructible_v, P, D const &> should be false when D is not copy constructible","2017-02 (Kona)","|Complete|","","" "`LWG2908 `__","The less-than operator for shared pointers could do more","2017-02 (Kona)","|Complete|","","" "`LWG2911 `__","An is_aggregate type trait is needed","2017-02 (Kona)","|Complete|","","" -"`LWG2921 `__","packaged_task and type-erased allocators","2017-02 (Kona)","|Complete|","","" +"`LWG2921 `__","packaged_task and type-erased allocators","2017-02 (Kona)","|Complete|","20.0","Originally implemented in LLVM 6.0 but reverted later. Old documentation incorrectly said it was implemented." "`LWG2934 `__","optional doesn't compare with T","2017-02 (Kona)","|Complete|","","" "","","","","","" "`LWG2901 `__","Variants cannot properly support allocators","2017-07 (Toronto)","|Complete|","","" diff --git a/libcxx/docs/Status/Cxx20Issues.csv b/libcxx/docs/Status/Cxx20Issues.csv index c79289968811bd..8e9fc1398bc095 100644 --- a/libcxx/docs/Status/Cxx20Issues.csv +++ b/libcxx/docs/Status/Cxx20Issues.csv @@ -27,7 +27,7 @@ "`LWG2964 `__","Apparently redundant requirement for dynamic_pointer_cast","2017-11 (Albuquerque)","","","" "`LWG2965 `__","Non-existing path::native_string() in filesystem_error::what() specification","2017-11 (Albuquerque)","|Nothing To Do|","","" "`LWG2972 `__","What is ``is_trivially_destructible_v``\ ?","2017-11 (Albuquerque)","|Complete|","","" -"`LWG2976 `__","Dangling uses_allocator specialization for packaged_task","2017-11 (Albuquerque)","|Complete|","","" +"`LWG2976 `__","Dangling uses_allocator specialization for packaged_task","2017-11 (Albuquerque)","|Complete|","20.0","Originally implemented in LLVM 6.0 but reverted later. Old documentation incorrectly said it was implemented." "`LWG2977 `__","unordered_meow::merge() has incorrect Throws: clause","2017-11 (Albuquerque)","|Nothing To Do|","","" "`LWG2978 `__","Hash support for pmr::string and friends","2017-11 (Albuquerque)","|Complete|","16.0","" "`LWG2979 `__","aligned_union should require complete object types","2017-11 (Albuquerque)","|Complete|","","" diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index a571832ab724d4..ffff8114e5870d 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -925,7 +925,6 @@ set(files exception execution expected - experimental/__config experimental/__simd/aligned_tag.h experimental/__simd/declaration.h experimental/__simd/reference.h diff --git a/libcxx/include/__config b/libcxx/include/__config index b0a5dda147a6ae..f0a92435230875 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -591,6 +591,15 @@ typedef __char32_t char32_t; inline namespace _LIBCPP_ABI_NAMESPACE { # define _LIBCPP_END_NAMESPACE_STD }} _LIBCPP_POP_EXTENSION_DIAGNOSTICS +#define _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL namespace std { namespace experimental { +#define _LIBCPP_END_NAMESPACE_EXPERIMENTAL }} + +#define _LIBCPP_BEGIN_NAMESPACE_LFTS _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL inline namespace fundamentals_v1 { +#define _LIBCPP_END_NAMESPACE_LFTS } _LIBCPP_END_NAMESPACE_EXPERIMENTAL + +#define _LIBCPP_BEGIN_NAMESPACE_LFTS_V2 _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL inline namespace fundamentals_v2 { +#define _LIBCPP_END_NAMESPACE_LFTS_V2 } _LIBCPP_END_NAMESPACE_EXPERIMENTAL + #ifdef _LIBCPP_ABI_NO_FILESYSTEM_INLINE_NAMESPACE # define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM _LIBCPP_BEGIN_NAMESPACE_STD namespace filesystem { # define _LIBCPP_END_NAMESPACE_FILESYSTEM } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__pstl/backend.h b/libcxx/include/__pstl/backend.h index 86d9f28c77fa8c..5980b0708cd340 100644 --- a/libcxx/include/__pstl/backend.h +++ b/libcxx/include/__pstl/backend.h @@ -19,16 +19,20 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> -#if defined(_LIBCPP_PSTL_BACKEND_SERIAL) -# include <__pstl/backends/default.h> -# include <__pstl/backends/serial.h> -#elif defined(_LIBCPP_PSTL_BACKEND_STD_THREAD) -# include <__pstl/backends/default.h> -# include <__pstl/backends/std_thread.h> -#elif defined(_LIBCPP_PSTL_BACKEND_LIBDISPATCH) -# include <__pstl/backends/default.h> -# include <__pstl/backends/libdispatch.h> -#endif +#if _LIBCPP_STD_VER >= 17 + +# if defined(_LIBCPP_PSTL_BACKEND_SERIAL) +# include <__pstl/backends/default.h> +# include <__pstl/backends/serial.h> +# elif defined(_LIBCPP_PSTL_BACKEND_STD_THREAD) +# include <__pstl/backends/default.h> +# include <__pstl/backends/std_thread.h> +# elif defined(_LIBCPP_PSTL_BACKEND_LIBDISPATCH) +# include <__pstl/backends/default.h> +# include <__pstl/backends/libdispatch.h> +# endif + +#endif // _LIBCPP_STD_VER >= 17 _LIBCPP_POP_MACROS diff --git a/libcxx/include/__pstl/backend_fwd.h b/libcxx/include/__pstl/backend_fwd.h index 32c5da576fb3c0..2132e8dbceb3ad 100644 --- a/libcxx/include/__pstl/backend_fwd.h +++ b/libcxx/include/__pstl/backend_fwd.h @@ -39,6 +39,8 @@ _LIBCPP_PUSH_MACROS // the user. // +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -50,18 +52,18 @@ struct __libdispatch_backend_tag; struct __serial_backend_tag; struct __std_thread_backend_tag; -#if defined(_LIBCPP_PSTL_BACKEND_SERIAL) +# if defined(_LIBCPP_PSTL_BACKEND_SERIAL) using __current_configuration = __backend_configuration<__serial_backend_tag, __default_backend_tag>; -#elif defined(_LIBCPP_PSTL_BACKEND_STD_THREAD) +# elif defined(_LIBCPP_PSTL_BACKEND_STD_THREAD) using __current_configuration = __backend_configuration<__std_thread_backend_tag, __default_backend_tag>; -#elif defined(_LIBCPP_PSTL_BACKEND_LIBDISPATCH) +# elif defined(_LIBCPP_PSTL_BACKEND_LIBDISPATCH) using __current_configuration = __backend_configuration<__libdispatch_backend_tag, __default_backend_tag>; -#else +# else // ...New vendors can add parallel backends here... -# error "Invalid PSTL backend configuration" -#endif +# error "Invalid PSTL backend configuration" +# endif template struct __find_if; @@ -296,6 +298,8 @@ struct __reduce; } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_BACKEND_FWD_H diff --git a/libcxx/include/__pstl/backends/default.h b/libcxx/include/__pstl/backends/default.h index b655da51fe340b..3672bbf60a265e 100644 --- a/libcxx/include/__pstl/backends/default.h +++ b/libcxx/include/__pstl/backends/default.h @@ -33,6 +33,8 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -498,6 +500,8 @@ struct __rotate_copy<__default_backend_tag, _ExecutionPolicy> { } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_BACKENDS_DEFAULT_H diff --git a/libcxx/include/__pstl/backends/libdispatch.h b/libcxx/include/__pstl/backends/libdispatch.h index a0c3ad980ed1b0..2d6ab49c8f7f3a 100644 --- a/libcxx/include/__pstl/backends/libdispatch.h +++ b/libcxx/include/__pstl/backends/libdispatch.h @@ -44,6 +44,8 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -140,15 +142,15 @@ struct __cpu_traits<__libdispatch_backend_tag> { unique_ptr<__merge_range_t[], decltype(__destroy)> __ranges( [&]() -> __merge_range_t* { -#ifndef _LIBCPP_HAS_NO_EXCEPTIONS +# ifndef _LIBCPP_HAS_NO_EXCEPTIONS try { -#endif +# endif return std::allocator<__merge_range_t>().allocate(__n_ranges); -#ifndef _LIBCPP_HAS_NO_EXCEPTIONS +# ifndef _LIBCPP_HAS_NO_EXCEPTIONS } catch (const std::bad_alloc&) { return nullptr; } -#endif +# endif }(), __destroy); @@ -392,6 +394,8 @@ struct __fill<__libdispatch_backend_tag, _ExecutionPolicy> } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_BACKENDS_LIBDISPATCH_H diff --git a/libcxx/include/__pstl/backends/serial.h b/libcxx/include/__pstl/backends/serial.h index 5f24499899bd20..f4142016ccc792 100644 --- a/libcxx/include/__pstl/backends/serial.h +++ b/libcxx/include/__pstl/backends/serial.h @@ -30,6 +30,8 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -176,6 +178,8 @@ struct __transform_reduce_binary<__serial_backend_tag, _ExecutionPolicy> { } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_BACKENDS_SERIAL_H diff --git a/libcxx/include/__pstl/backends/std_thread.h b/libcxx/include/__pstl/backends/std_thread.h index 49570bd30b0828..19b985f860a174 100644 --- a/libcxx/include/__pstl/backends/std_thread.h +++ b/libcxx/include/__pstl/backends/std_thread.h @@ -32,6 +32,8 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -131,6 +133,8 @@ struct __fill<__std_thread_backend_tag, _ExecutionPolicy> } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_BACKENDS_STD_THREAD_H diff --git a/libcxx/include/__pstl/cpu_algos/any_of.h b/libcxx/include/__pstl/cpu_algos/any_of.h index b33c787a29db26..803db7974eca7b 100644 --- a/libcxx/include/__pstl/cpu_algos/any_of.h +++ b/libcxx/include/__pstl/cpu_algos/any_of.h @@ -26,6 +26,8 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -94,6 +96,8 @@ struct __cpu_parallel_any_of { } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_CPU_ALGOS_ANY_OF_H diff --git a/libcxx/include/__pstl/cpu_algos/cpu_traits.h b/libcxx/include/__pstl/cpu_algos/cpu_traits.h index 0483d6918fd01d..5e59752fa5723c 100644 --- a/libcxx/include/__pstl/cpu_algos/cpu_traits.h +++ b/libcxx/include/__pstl/cpu_algos/cpu_traits.h @@ -19,6 +19,8 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -81,6 +83,8 @@ struct __cpu_traits; } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_CPU_ALGOS_CPU_TRAITS_H diff --git a/libcxx/include/__pstl/cpu_algos/fill.h b/libcxx/include/__pstl/cpu_algos/fill.h index 4e6d29b30cc69d..3e5936589a6a63 100644 --- a/libcxx/include/__pstl/cpu_algos/fill.h +++ b/libcxx/include/__pstl/cpu_algos/fill.h @@ -23,6 +23,8 @@ # pragma GCC system_header #endif +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -63,4 +65,6 @@ struct __cpu_parallel_fill { } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + #endif // _LIBCPP___PSTL_CPU_ALGOS_FILL_H diff --git a/libcxx/include/__pstl/cpu_algos/find_if.h b/libcxx/include/__pstl/cpu_algos/find_if.h index 12b2e88971df7d..cd92e5a99f12f8 100644 --- a/libcxx/include/__pstl/cpu_algos/find_if.h +++ b/libcxx/include/__pstl/cpu_algos/find_if.h @@ -31,6 +31,8 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -132,6 +134,8 @@ struct __cpu_parallel_find_if { } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H diff --git a/libcxx/include/__pstl/cpu_algos/for_each.h b/libcxx/include/__pstl/cpu_algos/for_each.h index d4d7862135ff91..cec719bc47b88c 100644 --- a/libcxx/include/__pstl/cpu_algos/for_each.h +++ b/libcxx/include/__pstl/cpu_algos/for_each.h @@ -23,6 +23,8 @@ # pragma GCC system_header #endif +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -63,4 +65,6 @@ struct __cpu_parallel_for_each { } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + #endif // _LIBCPP___PSTL_CPU_ALGOS_FOR_EACH_H diff --git a/libcxx/include/__pstl/cpu_algos/merge.h b/libcxx/include/__pstl/cpu_algos/merge.h index dfa4cbf69b1470..a9069ca51de2f5 100644 --- a/libcxx/include/__pstl/cpu_algos/merge.h +++ b/libcxx/include/__pstl/cpu_algos/merge.h @@ -26,6 +26,8 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -80,6 +82,8 @@ struct __cpu_parallel_merge { } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_CPU_ALGOS_MERGE_H diff --git a/libcxx/include/__pstl/cpu_algos/stable_sort.h b/libcxx/include/__pstl/cpu_algos/stable_sort.h index 8e64f3e537c072..5afdd3fd629ba9 100644 --- a/libcxx/include/__pstl/cpu_algos/stable_sort.h +++ b/libcxx/include/__pstl/cpu_algos/stable_sort.h @@ -21,6 +21,8 @@ # pragma GCC system_header #endif +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -44,4 +46,6 @@ struct __cpu_parallel_stable_sort { } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + #endif // _LIBCPP___PSTL_CPU_ALGOS_STABLE_SORT_H diff --git a/libcxx/include/__pstl/cpu_algos/transform.h b/libcxx/include/__pstl/cpu_algos/transform.h index 27ce8e27b242af..979121be8c8c9a 100644 --- a/libcxx/include/__pstl/cpu_algos/transform.h +++ b/libcxx/include/__pstl/cpu_algos/transform.h @@ -27,6 +27,8 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -148,6 +150,8 @@ struct __cpu_parallel_transform_binary { } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_H diff --git a/libcxx/include/__pstl/cpu_algos/transform_reduce.h b/libcxx/include/__pstl/cpu_algos/transform_reduce.h index 36ac1a9072a89e..aafbf1ca96b40d 100644 --- a/libcxx/include/__pstl/cpu_algos/transform_reduce.h +++ b/libcxx/include/__pstl/cpu_algos/transform_reduce.h @@ -31,6 +31,8 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -211,6 +213,8 @@ struct __cpu_parallel_transform_reduce { } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H diff --git a/libcxx/include/__pstl/dispatch.h b/libcxx/include/__pstl/dispatch.h index 5e903f7524fe9b..ea40fa79eb9496 100644 --- a/libcxx/include/__pstl/dispatch.h +++ b/libcxx/include/__pstl/dispatch.h @@ -23,6 +23,8 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -61,6 +63,8 @@ using __dispatch = typename __find_first_implemented<_Algorithm, _BackendConfigu } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_DISPATCH_H diff --git a/libcxx/include/__pstl/handle_exception.h b/libcxx/include/__pstl/handle_exception.h index d6270958c3a7c7..57dfcfde4554f8 100644 --- a/libcxx/include/__pstl/handle_exception.h +++ b/libcxx/include/__pstl/handle_exception.h @@ -22,6 +22,8 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -52,6 +54,8 @@ _LIBCPP_HIDE_FROM_ABI auto __handle_exception(_Args&&... __args) { } // namespace __pstl _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_HANDLE_EXCEPTION_H diff --git a/libcxx/include/__std_clang_module b/libcxx/include/__std_clang_module index 18d6ce6b46c1f6..572528669a1e15 100644 --- a/libcxx/include/__std_clang_module +++ b/libcxx/include/__std_clang_module @@ -33,9 +33,7 @@ #if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) # include #endif -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #include #include #include @@ -101,9 +99,7 @@ # include #endif #include -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #include #include #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) @@ -120,9 +116,7 @@ # include #endif #include -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #include #include #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) @@ -153,13 +147,9 @@ # include #endif #include -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #include -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #include #include #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) @@ -175,9 +165,7 @@ #include #include #include -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) # include #endif @@ -192,9 +180,7 @@ #endif #include #include -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #include #include #include diff --git a/libcxx/include/experimental/__config b/libcxx/include/experimental/__config deleted file mode 100644 index 7b23791511ceff..00000000000000 --- a/libcxx/include/experimental/__config +++ /dev/null @@ -1,45 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP_EXPERIMENTAL_CONFIG -#define _LIBCPP_EXPERIMENTAL_CONFIG - -#include <__config> - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -#define _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL \ - namespace std { \ - namespace experimental { -#define _LIBCPP_END_NAMESPACE_EXPERIMENTAL \ - } \ - } - -#define _LIBCPP_BEGIN_NAMESPACE_LFTS _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL inline namespace fundamentals_v1 { -#define _LIBCPP_END_NAMESPACE_LFTS \ - } \ - } \ - } - -#define _LIBCPP_BEGIN_NAMESPACE_LFTS_V2 _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL inline namespace fundamentals_v2 { -#define _LIBCPP_END_NAMESPACE_LFTS_V2 \ - } \ - } \ - } - -// TODO: support more targets -#if defined(__AVX__) -# define _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES 32 -#else -# define _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES 16 -#endif - -#endif diff --git a/libcxx/include/experimental/__simd/aligned_tag.h b/libcxx/include/experimental/__simd/aligned_tag.h index 31d2b50aa1dd36..e364e146a60116 100644 --- a/libcxx/include/experimental/__simd/aligned_tag.h +++ b/libcxx/include/experimental/__simd/aligned_tag.h @@ -10,10 +10,10 @@ #ifndef _LIBCPP_EXPERIMENTAL___SIMD_ALIGNED_TAG_H #define _LIBCPP_EXPERIMENTAL___SIMD_ALIGNED_TAG_H +#include <__config> #include <__memory/assume_aligned.h> #include <__type_traits/remove_const.h> #include -#include #include #if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL) diff --git a/libcxx/include/experimental/__simd/declaration.h b/libcxx/include/experimental/__simd/declaration.h index 7b45d035c27121..2ac7224159cf35 100644 --- a/libcxx/include/experimental/__simd/declaration.h +++ b/libcxx/include/experimental/__simd/declaration.h @@ -10,11 +10,18 @@ #ifndef _LIBCPP_EXPERIMENTAL___SIMD_DECLARATION_H #define _LIBCPP_EXPERIMENTAL___SIMD_DECLARATION_H +#include <__config> #include -#include #if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL) +// TODO: support more targets +# if defined(__AVX__) +# define _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES 32 +# else +# define _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES 16 +# endif + _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL inline namespace parallelism_v2 { namespace simd_abi { diff --git a/libcxx/include/experimental/__simd/reference.h b/libcxx/include/experimental/__simd/reference.h index c60c08b0ea459c..cba460baaa95bb 100644 --- a/libcxx/include/experimental/__simd/reference.h +++ b/libcxx/include/experimental/__simd/reference.h @@ -10,6 +10,7 @@ #ifndef _LIBCPP_EXPERIMENTAL___SIMD_REFERENCE_H #define _LIBCPP_EXPERIMENTAL___SIMD_REFERENCE_H +#include <__config> #include <__type_traits/enable_if.h> #include <__type_traits/is_assignable.h> #include <__type_traits/is_same.h> @@ -17,7 +18,6 @@ #include <__utility/forward.h> #include <__utility/move.h> #include -#include #include _LIBCPP_PUSH_MACROS diff --git a/libcxx/include/experimental/__simd/scalar.h b/libcxx/include/experimental/__simd/scalar.h index a2aeeb5cd0f54f..a76933e1a5849d 100644 --- a/libcxx/include/experimental/__simd/scalar.h +++ b/libcxx/include/experimental/__simd/scalar.h @@ -11,9 +11,9 @@ #define _LIBCPP_EXPERIMENTAL___SIMD_SCALAR_H #include <__assert> +#include <__config> #include <__type_traits/integral_constant.h> #include -#include #include #include diff --git a/libcxx/include/experimental/__simd/simd.h b/libcxx/include/experimental/__simd/simd.h index db0f9b39d9600c..2c65d19e67b36f 100644 --- a/libcxx/include/experimental/__simd/simd.h +++ b/libcxx/include/experimental/__simd/simd.h @@ -10,12 +10,12 @@ #ifndef _LIBCPP_EXPERIMENTAL___SIMD_SIMD_H #define _LIBCPP_EXPERIMENTAL___SIMD_SIMD_H +#include <__config> #include <__type_traits/enable_if.h> #include <__type_traits/is_same.h> #include <__type_traits/remove_cvref.h> #include <__utility/forward.h> #include -#include #include #include #include diff --git a/libcxx/include/experimental/__simd/simd_mask.h b/libcxx/include/experimental/__simd/simd_mask.h index d54d4898b718af..55273194153134 100644 --- a/libcxx/include/experimental/__simd/simd_mask.h +++ b/libcxx/include/experimental/__simd/simd_mask.h @@ -10,10 +10,10 @@ #ifndef _LIBCPP_EXPERIMENTAL___SIMD_SIMD_MASK_H #define _LIBCPP_EXPERIMENTAL___SIMD_SIMD_MASK_H +#include <__config> #include <__type_traits/enable_if.h> #include <__type_traits/is_same.h> #include -#include #include #include #include diff --git a/libcxx/include/experimental/__simd/traits.h b/libcxx/include/experimental/__simd/traits.h index ec25b4bfa7f95e..b817df604ef72e 100644 --- a/libcxx/include/experimental/__simd/traits.h +++ b/libcxx/include/experimental/__simd/traits.h @@ -11,10 +11,10 @@ #define _LIBCPP_EXPERIMENTAL___SIMD_TRAITS_H #include <__bit/bit_ceil.h> +#include <__config> #include <__type_traits/integral_constant.h> #include <__type_traits/is_same.h> #include -#include #include #include diff --git a/libcxx/include/experimental/__simd/utility.h b/libcxx/include/experimental/__simd/utility.h index 708fa3d8f72cef..0103b06b719532 100644 --- a/libcxx/include/experimental/__simd/utility.h +++ b/libcxx/include/experimental/__simd/utility.h @@ -10,6 +10,7 @@ #ifndef _LIBCPP_EXPERIMENTAL___SIMD_UTILITY_H #define _LIBCPP_EXPERIMENTAL___SIMD_UTILITY_H +#include <__config> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_const.h> #include <__type_traits/is_constant_evaluated.h> @@ -22,7 +23,6 @@ #include <__utility/integer_sequence.h> #include #include -#include #include _LIBCPP_PUSH_MACROS diff --git a/libcxx/include/experimental/__simd/vec_ext.h b/libcxx/include/experimental/__simd/vec_ext.h index 5787f237bb01ef..1f707cf3e18424 100644 --- a/libcxx/include/experimental/__simd/vec_ext.h +++ b/libcxx/include/experimental/__simd/vec_ext.h @@ -12,11 +12,11 @@ #include <__assert> #include <__bit/bit_ceil.h> +#include <__config> #include <__type_traits/integral_constant.h> #include <__utility/forward.h> #include <__utility/integer_sequence.h> #include -#include #include #include #include diff --git a/libcxx/include/experimental/iterator b/libcxx/include/experimental/iterator index de82da2d3d72bd..edfe6e707bcec9 100644 --- a/libcxx/include/experimental/iterator +++ b/libcxx/include/experimental/iterator @@ -52,11 +52,11 @@ namespace std { */ +#include <__config> #include <__memory/addressof.h> #include <__type_traits/decay.h> #include <__utility/forward.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/experimental/memory b/libcxx/include/experimental/memory index e9663d43a8ab73..bf8a154690af01 100644 --- a/libcxx/include/experimental/memory +++ b/libcxx/include/experimental/memory @@ -49,6 +49,7 @@ public: } */ +#include <__config> #include <__functional/hash.h> #include <__functional/operations.h> #include <__type_traits/add_lvalue_reference.h> @@ -57,7 +58,6 @@ public: #include <__type_traits/enable_if.h> #include <__type_traits/is_convertible.h> #include -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/experimental/propagate_const b/libcxx/include/experimental/propagate_const index d7a695d8388923..510d374bb4bf94 100644 --- a/libcxx/include/experimental/propagate_const +++ b/libcxx/include/experimental/propagate_const @@ -107,6 +107,7 @@ */ +#include <__config> #include <__functional/operations.h> #include <__fwd/functional.h> #include <__type_traits/conditional.h> @@ -128,7 +129,6 @@ #include <__utility/move.h> #include <__utility/swap.h> #include -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/experimental/simd b/libcxx/include/experimental/simd index 484543b81daf1f..35120b4b4aab42 100644 --- a/libcxx/include/experimental/simd +++ b/libcxx/include/experimental/simd @@ -75,7 +75,7 @@ inline namespace parallelism_v2 { # pragma GCC system_header #endif -#include +#include <__config> #include #include #include diff --git a/libcxx/include/experimental/type_traits b/libcxx/include/experimental/type_traits index 31b041bc94c43a..a4bb59afaf4ac2 100644 --- a/libcxx/include/experimental/type_traits +++ b/libcxx/include/experimental/type_traits @@ -68,7 +68,7 @@ inline namespace fundamentals_v1 { */ -#include +#include <__config> #if _LIBCPP_STD_VER >= 14 diff --git a/libcxx/include/experimental/utility b/libcxx/include/experimental/utility index 8bd0a055b7783f..cbc7ad140e40c1 100644 --- a/libcxx/include/experimental/utility +++ b/libcxx/include/experimental/utility @@ -30,7 +30,7 @@ inline namespace fundamentals_v1 { */ -#include +#include <__config> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/future b/libcxx/include/future index 9a0eb7971a313d..9158ea34ee109d 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -329,7 +329,7 @@ public: template explicit packaged_task(F&& f); template - packaged_task(allocator_arg_t, const Allocator& a, F&& f); + packaged_task(allocator_arg_t, const Allocator& a, F&& f); // removed in C++17 ~packaged_task(); // no copy @@ -356,7 +356,7 @@ public: template void swap(packaged_task&) noexcept; -template struct uses_allocator, Alloc>; +template struct uses_allocator, Alloc>; // removed in C++17 } // std @@ -1606,9 +1606,11 @@ public: template , packaged_task>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI explicit packaged_task(_Fp&& __f) : __f_(std::forward<_Fp>(__f)) {} +# if _LIBCPP_STD_VER <= 14 template , packaged_task>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI packaged_task(allocator_arg_t, const _Allocator& __a, _Fp&& __f) : __f_(allocator_arg_t(), __a, std::forward<_Fp>(__f)), __p_(allocator_arg_t(), __a) {} +# endif // ~packaged_task() = default; // no copy @@ -1696,9 +1698,11 @@ public: _LIBCPP_HIDE_FROM_ABI packaged_task() _NOEXCEPT : __p_(nullptr) {} template , packaged_task>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI explicit packaged_task(_Fp&& __f) : __f_(std::forward<_Fp>(__f)) {} +# if _LIBCPP_STD_VER <= 14 template , packaged_task>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI packaged_task(allocator_arg_t, const _Allocator& __a, _Fp&& __f) : __f_(allocator_arg_t(), __a, std::forward<_Fp>(__f)), __p_(allocator_arg_t(), __a) {} +# endif // ~packaged_task() = default; // no copy @@ -1790,8 +1794,10 @@ swap(packaged_task<_Rp(_ArgTypes...)>& __x, packaged_task<_Rp(_ArgTypes...)>& __ __x.swap(__y); } +# if _LIBCPP_STD_VER <= 14 template struct _LIBCPP_TEMPLATE_VIS uses_allocator, _Alloc> : public true_type {}; +# endif template _LIBCPP_HIDE_FROM_ABI future<_Rp> __make_deferred_assoc_state(_Fp&& __f) { diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 65df579b8d6dd7..add8726dead428 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -610,10 +610,6 @@ module std_experimental [system] { header "experimental/utility" export * } - module __config { - textual header "experimental/__config" - export * - } } // Convenience method to get all of the above modules in a single import statement. diff --git a/libcxx/modules/std.cppm.in b/libcxx/modules/std.cppm.in index ad8a639b7f71a1..653f9933ba67c7 100644 --- a/libcxx/modules/std.cppm.in +++ b/libcxx/modules/std.cppm.in @@ -23,9 +23,7 @@ module; #if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) # include #endif -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #include #include #include @@ -76,9 +74,7 @@ module; # include #endif #include -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #include #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) # include @@ -94,9 +90,7 @@ module; # include #endif #include -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #include #include #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) @@ -123,13 +117,9 @@ module; # include #endif #include -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #include -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #include #include #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) @@ -137,9 +127,7 @@ module; #endif #include #include -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) # include #endif @@ -152,9 +140,7 @@ module; # include #endif #include -#if !defined(_LIBCPP_HAS_NO_THREADS) -# include -#endif +#include #include #include #include diff --git a/libcxx/src/any.cpp b/libcxx/src/any.cpp index b0ad695669dec4..eaca2dd2397650 100644 --- a/libcxx/src/any.cpp +++ b/libcxx/src/any.cpp @@ -12,7 +12,7 @@ namespace std { const char* bad_any_cast::what() const noexcept { return "bad any cast"; } } // namespace std -#include +#include <__config> // Preserve std::experimental::any_bad_cast for ABI compatibility // Even though it no longer exists in a header file diff --git a/libcxx/src/optional.cpp b/libcxx/src/optional.cpp index 62b474a312be2d..4e7e28898f8814 100644 --- a/libcxx/src/optional.cpp +++ b/libcxx/src/optional.cpp @@ -17,7 +17,7 @@ const char* bad_optional_access::what() const noexcept { return "bad_optional_ac } // namespace std -#include +#include <__config> // Preserve std::experimental::bad_optional_access for ABI compatibility // Even though it no longer exists in a header file diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor2.compile.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor2.compile.pass.cpp index 3ab59909cfafbe..a3bdd45975c96f 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor2.compile.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor2.compile.pass.cpp @@ -18,11 +18,13 @@ #include #include +#include +#include #include "test_allocator.h" struct A {}; -using PT = std::packaged_task; +using PT = std::packaged_task; using VPT = volatile std::packaged_task; static_assert(!std::is_constructible, VPT>::value, ""); @@ -35,7 +37,14 @@ static_assert(!std::is_constructible static_assert(!std::is_constructible, volatile PA&>::value, ""); static_assert(!std::is_constructible, volatile PA&&>::value, ""); -static_assert( std::is_constructible, const PI&>::value, ""); -static_assert( std::is_constructible, const PI&&>::value, ""); -static_assert( std::is_constructible, volatile PI&>::value, ""); -static_assert( std::is_constructible, volatile PI&&>::value, ""); +#if TEST_STD_VER >= 17 // packaged_task allocator support was removed in C++17 (LWG 2921) +static_assert(!std::is_constructible_v, const PI&>); +static_assert(!std::is_constructible_v, const PI&&>); +static_assert(!std::is_constructible_v, volatile PI&>); +static_assert(!std::is_constructible_v, volatile PI&&>); +#else +static_assert(std::is_constructible, const PI&>::value, ""); +static_assert(std::is_constructible, const PI&&>::value, ""); +static_assert(std::is_constructible, volatile PI&>::value, ""); +static_assert(std::is_constructible, volatile PI&&>::value, ""); +#endif diff --git a/libcxx/utils/libcxx/header_information.py b/libcxx/utils/libcxx/header_information.py index 166c9a77c08e70..694402141e1fab 100644 --- a/libcxx/utils/libcxx/header_information.py +++ b/libcxx/utils/libcxx/header_information.py @@ -31,15 +31,6 @@ "strstream": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", "syncstream": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", - # headers with #error directives - "barrier": "!defined(_LIBCPP_HAS_NO_THREADS)", - "future": "!defined(_LIBCPP_HAS_NO_THREADS)", - "latch": "!defined(_LIBCPP_HAS_NO_THREADS)", - "semaphore": "!defined(_LIBCPP_HAS_NO_THREADS)", - "shared_mutex": "!defined(_LIBCPP_HAS_NO_THREADS)", - "stop_token": "!defined(_LIBCPP_HAS_NO_THREADS)", - "thread": "!defined(_LIBCPP_HAS_NO_THREADS)", - # headers with #error directives "wchar.h": "!defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)", "wctype.h": "!defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)", diff --git a/lld/test/wasm/static-error.s b/lld/test/wasm/static-error.s new file mode 100644 index 00000000000000..3557506a5f07a2 --- /dev/null +++ b/lld/test/wasm/static-error.s @@ -0,0 +1,12 @@ +// RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o +// RUN: wasm-ld --experimental-pic -shared -o %t.so %t.o + +// RUN: wasm-ld --experimental-pic -pie -o /dev/null %t.o %t.so +// RUN: not wasm-ld -o /dev/null -static %t.o %t.so 2>&1 | FileCheck %s + +// CHECK: attempted static link of dynamic object + +.global _start +_start: + .functype _start () -> () + end_function diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index cb8fe2534f5fe7..2de7dcaeb43d47 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -333,9 +333,15 @@ void LinkerDriver::addFile(StringRef path) { return; } case file_magic::bitcode: - case file_magic::wasm_object: - files.push_back(createObjectFile(mbref, "", 0, inLib)); + case file_magic::wasm_object: { + auto obj = createObjectFile(mbref, "", 0, inLib); + if (config->isStatic && isa(obj)) { + error("attempted static link of dynamic object " + path); + break; + } + files.push_back(obj); break; + } case file_magic::unknown: if (mbref.getBuffer().starts_with("#STUB")) { files.push_back(make(mbref)); diff --git a/lldb/include/lldb/API/SBMemoryRegionInfo.h b/lldb/include/lldb/API/SBMemoryRegionInfo.h index be55de4ead1fa8..f9a5dc993d7cb6 100644 --- a/lldb/include/lldb/API/SBMemoryRegionInfo.h +++ b/lldb/include/lldb/API/SBMemoryRegionInfo.h @@ -120,7 +120,7 @@ class LLDB_API SBMemoryRegionInfo { private: friend class SBProcess; friend class SBMemoryRegionInfoList; - + friend class SBSaveCoreOptions; friend class lldb_private::ScriptInterpreter; lldb_private::MemoryRegionInfo &ref(); diff --git a/lldb/include/lldb/API/SBSaveCoreOptions.h b/lldb/include/lldb/API/SBSaveCoreOptions.h index ba48ba5eaea5a0..c076d3ce6f7575 100644 --- a/lldb/include/lldb/API/SBSaveCoreOptions.h +++ b/lldb/include/lldb/API/SBSaveCoreOptions.h @@ -80,6 +80,17 @@ class LLDB_API SBSaveCoreOptions { /// \return True if the thread was removed, false if it was not in the list. bool RemoveThread(lldb::SBThread thread); + /// Add a memory region to save in the core file. + /// + /// \param region The memory region to save. + /// \returns An empty SBError upon success, or an error if the region is + /// invalid. + /// \note Ranges that overlapped will be unioned into a single region, this + /// also supercedes stack minification. Specifying full regions and a + /// non-custom core style will include the specified regions and union them + /// with all style specific regions. + SBError AddMemoryRegionToSave(const SBMemoryRegionInfo ®ion); + /// Reset all options. void Clear(); diff --git a/lldb/include/lldb/Symbol/SaveCoreOptions.h b/lldb/include/lldb/Symbol/SaveCoreOptions.h index f4fed4676fa4ae..d90d08026016dc 100644 --- a/lldb/include/lldb/Symbol/SaveCoreOptions.h +++ b/lldb/include/lldb/Symbol/SaveCoreOptions.h @@ -10,13 +10,15 @@ #define LLDB_SOURCE_PLUGINS_OBJECTFILE_SaveCoreOPTIONS_H #include "lldb/Utility/FileSpec.h" -#include "lldb/lldb-forward.h" -#include "lldb/lldb-types.h" +#include "lldb/Utility/RangeMap.h" #include +#include #include #include +using MemoryRanges = lldb_private::RangeVector; + namespace lldb_private { class SaveCoreOptions { @@ -38,8 +40,12 @@ class SaveCoreOptions { Status AddThread(lldb::ThreadSP thread_sp); bool RemoveThread(lldb::ThreadSP thread_sp); bool ShouldThreadBeSaved(lldb::tid_t tid) const; + bool HasSpecifiedThreads() const; Status EnsureValidConfiguration(lldb::ProcessSP process_sp) const; + const MemoryRanges &GetCoreFileMemoryRanges() const; + + void AddMemoryRegionToSave(const lldb_private::MemoryRegionInfo ®ion); void Clear(); @@ -51,6 +57,7 @@ class SaveCoreOptions { std::optional m_style; lldb::ProcessSP m_process_sp; std::unordered_set m_threads_to_save; + MemoryRanges m_regions_to_save; }; } // namespace lldb_private diff --git a/lldb/include/lldb/Target/CoreFileMemoryRanges.h b/lldb/include/lldb/Target/CoreFileMemoryRanges.h new file mode 100644 index 00000000000000..503ecd691e5948 --- /dev/null +++ b/lldb/include/lldb/Target/CoreFileMemoryRanges.h @@ -0,0 +1,50 @@ +//===-- CoreFileMemoryRanges.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Utility/RangeMap.h" +#include "lldb/Utility/Status.h" + +#include "llvm/ADT/AddressRanges.h" + +#ifndef LLDB_TARGET_COREFILEMEMORYRANGES_H +#define LLDB_TARGET_COREFILEMEMORYRANGES_H + +namespace lldb_private { + +struct CoreFileMemoryRange { + llvm::AddressRange range; /// The address range to save into the core file. + uint32_t lldb_permissions; /// A bit set of lldb::Permissions bits. + + bool operator==(const CoreFileMemoryRange &rhs) const { + return range == rhs.range && lldb_permissions == rhs.lldb_permissions; + } + + bool operator!=(const CoreFileMemoryRange &rhs) const { + return !(*this == rhs); + } + + bool operator<(const CoreFileMemoryRange &rhs) const { + if (range < rhs.range) + return true; + if (range == rhs.range) + return lldb_permissions < rhs.lldb_permissions; + return false; + } +}; + +class CoreFileMemoryRanges + : public lldb_private::RangeDataVector { +public: + /// Finalize and merge all overlapping ranges in this collection. Ranges + /// will be seperated based on permissions. + Status FinalizeCoreFileSaveRanges(); +}; +} // namespace lldb_private + +#endif // LLDB_TARGET_COREFILEMEMORYRANGES_H diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index c66cfb2c245efd..b8c53a474ba6b9 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -35,6 +35,8 @@ #include "lldb/Host/ProcessLaunchInfo.h" #include "lldb/Host/ProcessRunLock.h" #include "lldb/Symbol/ObjectFile.h" +#include "lldb/Symbol/SaveCoreOptions.h" +#include "lldb/Target/CoreFileMemoryRanges.h" #include "lldb/Target/ExecutionContextScope.h" #include "lldb/Target/InstrumentationRuntime.h" #include "lldb/Target/Memory.h" @@ -710,29 +712,6 @@ class Process : public std::enable_shared_from_this, /// is not supported by the plugin, error otherwise. virtual llvm::Expected SaveCore(llvm::StringRef outfile); - struct CoreFileMemoryRange { - llvm::AddressRange range; /// The address range to save into the core file. - uint32_t lldb_permissions; /// A bit set of lldb::Permissions bits. - - bool operator==(const CoreFileMemoryRange &rhs) const { - return range == rhs.range && lldb_permissions == rhs.lldb_permissions; - } - - bool operator!=(const CoreFileMemoryRange &rhs) const { - return !(*this == rhs); - } - - bool operator<(const CoreFileMemoryRange &rhs) const { - if (range < rhs.range) - return true; - if (range == rhs.range) - return lldb_permissions < rhs.lldb_permissions; - return false; - } - }; - - using CoreFileMemoryRanges = std::vector; - /// Helper function for Process::SaveCore(...) that calculates the address /// ranges that should be saved. This allows all core file plug-ins to save /// consistent memory ranges given a \a core_style. diff --git a/lldb/include/lldb/Utility/RangeMap.h b/lldb/include/lldb/Utility/RangeMap.h index 8cc382bcc046ce..433466eebced8b 100644 --- a/lldb/include/lldb/Utility/RangeMap.h +++ b/lldb/include/lldb/Utility/RangeMap.h @@ -450,6 +450,12 @@ class RangeDataVector { void Append(const Entry &entry) { m_entries.emplace_back(entry); } + /// Append a range with data to the vector + /// \param B The base of the memory range + /// \param S The size of the memory range + /// \param T The data associated with the memory range + void Append(B &&b, S &&s, T &&t) { m_entries.emplace_back(Entry(b, s, t)); } + bool Erase(uint32_t start, uint32_t end) { if (start >= end || end > m_entries.size()) return false; diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index 7bfde8b9de1271..938f6e3abe8f2a 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -1222,6 +1222,7 @@ enum SaveCoreStyle { eSaveCoreFull = 1, eSaveCoreDirtyOnly = 2, eSaveCoreStackOnly = 3, + eSaveCoreCustomOnly = 4, }; /// Events that might happen during a trace session. diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index 337eff696fcf3f..5fb288ad43af48 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -207,6 +207,7 @@ class StackFrameRecognizer; class StackFrameRecognizerManager; class StackID; class Status; +class SaveCoreOptions; class StopInfo; class Stoppoint; class StoppointCallbackContext; diff --git a/lldb/include/lldb/lldb-private-interfaces.h b/lldb/include/lldb/lldb-private-interfaces.h index b3c8cda899b95e..5bac5cd3e86b59 100644 --- a/lldb/include/lldb/lldb-private-interfaces.h +++ b/lldb/include/lldb/lldb-private-interfaces.h @@ -9,7 +9,6 @@ #ifndef LLDB_LLDB_PRIVATE_INTERFACES_H #define LLDB_LLDB_PRIVATE_INTERFACES_H -#include "lldb/Symbol/SaveCoreOptions.h" #include "lldb/lldb-enumerations.h" #include "lldb/lldb-forward.h" #include "lldb/lldb-private-enumerations.h" diff --git a/lldb/source/API/SBSaveCoreOptions.cpp b/lldb/source/API/SBSaveCoreOptions.cpp index ef82b0253f1199..c79b57fa62c2be 100644 --- a/lldb/source/API/SBSaveCoreOptions.cpp +++ b/lldb/source/API/SBSaveCoreOptions.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBSaveCoreOptions.h" +#include "lldb/API/SBMemoryRegionInfo.h" #include "lldb/Host/FileSystem.h" #include "lldb/Symbol/SaveCoreOptions.h" #include "lldb/Utility/Instrumentation.h" @@ -89,6 +90,16 @@ bool SBSaveCoreOptions::RemoveThread(lldb::SBThread thread) { return m_opaque_up->RemoveThread(thread.GetSP()); } +lldb::SBError +SBSaveCoreOptions::AddMemoryRegionToSave(const SBMemoryRegionInfo ®ion) { + LLDB_INSTRUMENT_VA(this, region); + // Currently add memory region can't fail, so we always return a success + // SBerror, but because these API's live forever, this is the most future + // proof thing to do. + m_opaque_up->AddMemoryRegionToSave(region.ref()); + return SBError(); +} + void SBSaveCoreOptions::Clear() { LLDB_INSTRUMENT_VA(this); m_opaque_up->Clear(); diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp index 25eb633f1e6dad..5b0f4f66f248b6 100644 --- a/lldb/source/Commands/CommandObjectProcess.cpp +++ b/lldb/source/Commands/CommandObjectProcess.cpp @@ -25,6 +25,7 @@ #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionGroupPythonClassWithDict.h" #include "lldb/Interpreter/Options.h" +#include "lldb/Symbol/SaveCoreOptions.h" #include "lldb/Target/Platform.h" #include "lldb/Target/Process.h" #include "lldb/Target/StopInfo.h" diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index b28beab117cca4..06da83e26a26a5 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -6562,13 +6562,15 @@ bool ObjectFileMachO::SaveCore(const lldb::ProcessSP &process_sp, } if (make_core) { - Process::CoreFileMemoryRanges core_ranges; + CoreFileMemoryRanges core_ranges; error = process_sp->CalculateCoreFileSaveRanges(options, core_ranges); if (error.Success()) { const uint32_t addr_byte_size = target_arch.GetAddressByteSize(); const ByteOrder byte_order = target_arch.GetByteOrder(); std::vector segment_load_commands; - for (const auto &core_range : core_ranges) { + for (const auto &core_range_info : core_ranges) { + // TODO: Refactor RangeDataVector to have a data iterator. + const auto &core_range = core_range_info.data; uint32_t cmd_type = LC_SEGMENT_64; uint32_t segment_size = sizeof(llvm::MachO::segment_command_64); if (addr_byte_size == 4) { diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h index 27bc237aaac48d..be87112df7d898 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h @@ -12,6 +12,7 @@ #include "lldb/Core/Address.h" #include "lldb/Host/SafeMachO.h" #include "lldb/Symbol/ObjectFile.h" +#include "lldb/Symbol/SaveCoreOptions.h" #include "lldb/Utility/FileSpec.h" #include "lldb/Utility/FileSpecList.h" #include "lldb/Utility/RangeMap.h" diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp index 5c9ba223ad143e..edc568a6b47e00 100644 --- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp +++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp @@ -831,25 +831,32 @@ Status MinidumpFileBuilder::AddMemoryList() { // bytes of the core file. Thread structures in minidump files can only use // 32 bit memory descriptiors, so we emit them first to ensure the memory is // in accessible with a 32 bit offset. - Process::CoreFileMemoryRanges ranges_32; - Process::CoreFileMemoryRanges ranges_64; - Process::CoreFileMemoryRanges all_core_memory_ranges; + std::vector ranges_32; + std::vector ranges_64; + CoreFileMemoryRanges all_core_memory_ranges; error = m_process_sp->CalculateCoreFileSaveRanges(m_save_core_options, all_core_memory_ranges); + + std::vector all_core_memory_vec; + // Extract all the data into just a vector of data. So we can mutate this in + // place. + for (const auto &core_range : all_core_memory_ranges) + all_core_memory_vec.push_back(core_range.data); + if (error.Fail()) return error; // Start by saving all of the stacks and ensuring they fit under the 32b // limit. uint64_t total_size = GetCurrentDataEndOffset(); - auto iterator = all_core_memory_ranges.begin(); - while (iterator != all_core_memory_ranges.end()) { + auto iterator = all_core_memory_vec.begin(); + while (iterator != all_core_memory_vec.end()) { if (m_saved_stack_ranges.count(iterator->range.start()) > 0) { // We don't save stacks twice. ranges_32.push_back(*iterator); total_size += iterator->range.size() + sizeof(llvm::minidump::MemoryDescriptor); - iterator = all_core_memory_ranges.erase(iterator); + iterator = all_core_memory_vec.erase(iterator); } else { iterator++; } @@ -869,11 +876,11 @@ Status MinidumpFileBuilder::AddMemoryList() { // Then anything overflow extends into 64b addressable space. // All core memeroy ranges will either container nothing on stacks only // or all the memory ranges including stacks - if (!all_core_memory_ranges.empty()) - total_size += 256 + (all_core_memory_ranges.size() * + if (!all_core_memory_vec.empty()) + total_size += 256 + (all_core_memory_vec.size() * sizeof(llvm::minidump::MemoryDescriptor_64)); - for (const auto &core_range : all_core_memory_ranges) { + for (const auto &core_range : all_core_memory_vec) { const addr_t range_size = core_range.range.size(); // We don't need to check for stacks here because we already removed them // from all_core_memory_ranges. @@ -958,15 +965,15 @@ Status MinidumpFileBuilder::DumpDirectories() const { } static uint64_t -GetLargestRangeSize(const Process::CoreFileMemoryRanges &ranges) { +GetLargestRangeSize(const std::vector &ranges) { uint64_t max_size = 0; for (const auto &core_range : ranges) max_size = std::max(max_size, core_range.range.size()); return max_size; } -Status -MinidumpFileBuilder::AddMemoryList_32(Process::CoreFileMemoryRanges &ranges) { +Status MinidumpFileBuilder::AddMemoryList_32( + std::vector &ranges) { std::vector descriptors; Status error; if (ranges.size() == 0) @@ -1042,8 +1049,8 @@ MinidumpFileBuilder::AddMemoryList_32(Process::CoreFileMemoryRanges &ranges) { return error; } -Status -MinidumpFileBuilder::AddMemoryList_64(Process::CoreFileMemoryRanges &ranges) { +Status MinidumpFileBuilder::AddMemoryList_64( + std::vector &ranges) { Status error; if (ranges.empty()) return error; diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h index 762de83db5a39c..71001e26c00e91 100644 --- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h +++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h @@ -23,6 +23,7 @@ #include #include +#include "lldb/Symbol/SaveCoreOptions.h" #include "lldb/Target/Process.h" #include "lldb/Target/Target.h" #include "lldb/Utility/DataBufferHeap.h" @@ -120,9 +121,9 @@ class MinidumpFileBuilder { lldb_private::Status AddData(const void *data, uint64_t size); // Add MemoryList stream, containing dumps of important memory segments lldb_private::Status - AddMemoryList_64(lldb_private::Process::CoreFileMemoryRanges &ranges); + AddMemoryList_64(std::vector &ranges); lldb_private::Status - AddMemoryList_32(lldb_private::Process::CoreFileMemoryRanges &ranges); + AddMemoryList_32(std::vector &ranges); // Update the thread list on disk with the newly emitted stack RVAs. lldb_private::Status FixThreadStacks(); lldb_private::Status FlushBufferToDisk(); diff --git a/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.h b/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.h index b76fcd0052a8a8..2f45f01558e667 100644 --- a/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.h +++ b/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.h @@ -21,6 +21,7 @@ #define LLDB_SOURCE_PLUGINS_OBJECTFILE_MINIDUMP_OBJECTFILEMINIDUMP_H #include "lldb/Symbol/ObjectFile.h" +#include "lldb/Symbol/SaveCoreOptions.h" #include "lldb/Utility/ArchSpec.h" class ObjectFileMinidump : public lldb_private::PluginInterface { diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp index 9d01089745dfc9..8d9c919bc9b101 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp +++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp @@ -17,6 +17,7 @@ #include "lldb/Interpreter/OptionValueDictionary.h" #include "lldb/Interpreter/OptionValueProperties.h" #include "lldb/Symbol/ObjectFile.h" +#include "lldb/Symbol/SaveCoreOptions.h" #include "lldb/Target/Process.h" #include "lldb/Target/SectionLoadList.h" #include "lldb/Target/Target.h" diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h index 8bccf3be3e5f63..4f4dedf773c5ba 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h +++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h @@ -13,6 +13,7 @@ #include #include "lldb/Symbol/ObjectFile.h" +#include "lldb/Symbol/SaveCoreOptions.h" #include "llvm/Object/COFF.h" class ObjectFilePECOFF : public lldb_private::ObjectFile { diff --git a/lldb/source/Symbol/SaveCoreOptions.cpp b/lldb/source/Symbol/SaveCoreOptions.cpp index 35943726f2e4ef..8d9aadece2152d 100644 --- a/lldb/source/Symbol/SaveCoreOptions.cpp +++ b/lldb/source/Symbol/SaveCoreOptions.cpp @@ -102,6 +102,19 @@ bool SaveCoreOptions::ShouldThreadBeSaved(lldb::tid_t tid) const { return m_threads_to_save.count(tid) > 0; } +bool SaveCoreOptions::HasSpecifiedThreads() const { + return !m_threads_to_save.empty(); +} + +void SaveCoreOptions::AddMemoryRegionToSave( + const lldb_private::MemoryRegionInfo ®ion) { + m_regions_to_save.Insert(region.GetRange(), /*combine=*/true); +} + +const MemoryRanges &SaveCoreOptions::GetCoreFileMemoryRanges() const { + return m_regions_to_save; +} + Status SaveCoreOptions::EnsureValidConfiguration( lldb::ProcessSP process_sp) const { Status error; @@ -131,4 +144,5 @@ void SaveCoreOptions::Clear() { m_style = std::nullopt; m_threads_to_save.clear(); m_process_sp.reset(); + m_regions_to_save.Clear(); } diff --git a/lldb/source/Target/CMakeLists.txt b/lldb/source/Target/CMakeLists.txt index a42c44b761dc56..a6d2eace975420 100644 --- a/lldb/source/Target/CMakeLists.txt +++ b/lldb/source/Target/CMakeLists.txt @@ -11,6 +11,7 @@ add_lldb_library(lldbTarget ABI.cpp AssertFrameRecognizer.cpp DynamicRegisterInfo.cpp + CoreFileMemoryRanges.cpp ExecutionContext.cpp InstrumentationRuntime.cpp InstrumentationRuntimeStopInfo.cpp diff --git a/lldb/source/Target/CoreFileMemoryRanges.cpp b/lldb/source/Target/CoreFileMemoryRanges.cpp new file mode 100644 index 00000000000000..6e4ca4995915c3 --- /dev/null +++ b/lldb/source/Target/CoreFileMemoryRanges.cpp @@ -0,0 +1,86 @@ +//===-- CoreFileMemoryRanges.cpp --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Target/CoreFileMemoryRanges.h" + +using namespace lldb; +using namespace lldb_private; + +using Entry = CoreFileMemoryRanges::Entry; + +static bool Overlaps(const Entry *region_one, const Entry *region_two) { + return !(region_one->GetRangeEnd() < region_two->GetRangeBase() || + region_two->GetRangeEnd() < region_one->GetRangeBase()); +} + +static bool IntersectHelper(const Entry *region_one, const Entry *region_two) { + return region_one->GetRangeBase() == region_two->GetRangeEnd() || + region_one->GetRangeEnd() == region_two->GetRangeBase(); +} + +static bool OnlyIntersects(const Entry *region_one, const Entry *region_two) { + return IntersectHelper(region_one, region_two) || + IntersectHelper(region_two, region_one); +} + +static bool PermissionsMatch(const Entry *region_one, const Entry *region_two) { + return region_one->data.lldb_permissions == region_two->data.lldb_permissions; +} + +// This assumes any overlapping ranges will share the same permissions +// and that adjacent ranges could have different permissions. +Status CoreFileMemoryRanges::FinalizeCoreFileSaveRanges() { + Status error; + this->Sort(); + for (size_t i = this->GetSize() - 1; i > 0; i--) { + auto region_one = this->GetMutableEntryAtIndex(i); + auto region_two = this->GetMutableEntryAtIndex(i - 1); + if (Overlaps(region_one, region_two)) { + // It's okay for interesecting regions to have different permissions but + // if they overlap we fail because we don't know what to do with them. + if (!PermissionsMatch(region_one, region_two)) { + // Permissions mismatch and it's not a simple intersection. + if (!OnlyIntersects(region_one, region_two)) { + error = Status::FromErrorStringWithFormatv( + "Memory region at {0}::{1} has different permssions than " + "overlapping region at {2}::{3}", + region_one->GetRangeBase(), region_one->GetRangeEnd(), + region_two->GetRangeBase(), region_two->GetRangeEnd()); + return error; + } + // Simple intersection, we can just not merge these. + else + continue; + } + const addr_t base = + std::min(region_one->GetRangeBase(), region_two->GetRangeBase()); + const addr_t byte_size = + std::max(region_one->GetRangeEnd(), region_two->GetRangeEnd()) - base; + + region_two->SetRangeBase(base); + region_two->SetByteSize(byte_size); + + // Because this is a range data vector, the entry has a base as well + // as the data contained in the entry. So we have to update both. + // And llvm::AddressRange isn't mutable so we have to create a new one. + llvm::AddressRange range(base, base + byte_size); + const CoreFileMemoryRange core_range = { + range, region_two->data.lldb_permissions}; + region_two->data = core_range; + // Erase is delete from [Inclusive, exclusive index). + if (!this->Erase(i, i + 1)) { + error = Status::FromErrorStringWithFormat( + "Core file memory ranges mutated outside of " + "CalculateCoreFileSaveRanges"); + return error; + } + } + } + + return error; +} diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 40f3115f1ff6de..aca08972811470 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -6463,7 +6463,7 @@ Status Process::WriteMemoryTags(lldb::addr_t addr, size_t len, } // Create a CoreFileMemoryRange from a MemoryRegionInfo -static Process::CoreFileMemoryRange +static CoreFileMemoryRange CreateCoreFileMemoryRange(const MemoryRegionInfo ®ion) { const addr_t addr = region.GetRange().GetRangeBase(); llvm::AddressRange range(addr, addr + region.GetRange().GetByteSize()); @@ -6474,7 +6474,7 @@ CreateCoreFileMemoryRange(const MemoryRegionInfo ®ion) { // were added. Return false if the dirty page information is not valid or in // the region. static bool AddDirtyPages(const MemoryRegionInfo ®ion, - Process::CoreFileMemoryRanges &ranges) { + CoreFileMemoryRanges &ranges) { const auto &dirty_page_list = region.GetDirtyPageList(); if (!dirty_page_list) return false; @@ -6494,14 +6494,14 @@ static bool AddDirtyPages(const MemoryRegionInfo ®ion, } else { // Add previous contiguous range and init the new range with the // current dirty page. - ranges.push_back({range, lldb_permissions}); + ranges.Append(range.start(), range.size(), {range, lldb_permissions}); range = llvm::AddressRange(page_addr, page_addr + page_size); } } } // The last range if (!range.empty()) - ranges.push_back({range, lldb_permissions}); + ranges.Append(range.start(), range.size(), {range, lldb_permissions}); return true; } @@ -6513,7 +6513,7 @@ static bool AddDirtyPages(const MemoryRegionInfo ®ion, // will be added to \a ranges, else the entire range will be added to \a // ranges. static void AddRegion(const MemoryRegionInfo ®ion, bool try_dirty_pages, - Process::CoreFileMemoryRanges &ranges) { + CoreFileMemoryRanges &ranges) { // Don't add empty ranges. if (region.GetRange().GetByteSize() == 0) return; @@ -6522,13 +6522,17 @@ static void AddRegion(const MemoryRegionInfo ®ion, bool try_dirty_pages, return; if (try_dirty_pages && AddDirtyPages(region, ranges)) return; - ranges.push_back(CreateCoreFileMemoryRange(region)); + + ranges.Append(region.GetRange().GetRangeBase(), + region.GetRange().GetByteSize(), + CreateCoreFileMemoryRange(region)); } -static void SaveOffRegionsWithStackPointers( - Process &process, const SaveCoreOptions &core_options, - const MemoryRegionInfos ®ions, Process::CoreFileMemoryRanges &ranges, - std::set &stack_ends) { +static void SaveOffRegionsWithStackPointers(Process &process, + const SaveCoreOptions &core_options, + const MemoryRegionInfos ®ions, + CoreFileMemoryRanges &ranges, + std::set &stack_ends) { const bool try_dirty_pages = true; // Before we take any dump, we want to save off the used portions of the @@ -6568,11 +6572,11 @@ static void SaveOffRegionsWithStackPointers( // for a full core file style. static void GetCoreFileSaveRangesFull(Process &process, const MemoryRegionInfos ®ions, - Process::CoreFileMemoryRanges &ranges, + CoreFileMemoryRanges &ranges, std::set &stack_ends) { // Don't add only dirty pages, add full regions. -const bool try_dirty_pages = false; + const bool try_dirty_pages = false; for (const auto ®ion : regions) if (stack_ends.count(region.GetRange().GetRangeEnd()) == 0) AddRegion(region, try_dirty_pages, ranges); @@ -6582,9 +6586,10 @@ const bool try_dirty_pages = false; // least some dirty pages, as some OS versions don't support reporting what // pages are dirty within an memory region. If no memory regions have dirty // page information fall back to saving out all ranges with write permissions. -static void GetCoreFileSaveRangesDirtyOnly( - Process &process, const MemoryRegionInfos ®ions, - Process::CoreFileMemoryRanges &ranges, std::set &stack_ends) { +static void GetCoreFileSaveRangesDirtyOnly(Process &process, + const MemoryRegionInfos ®ions, + CoreFileMemoryRanges &ranges, + std::set &stack_ends) { // Iterate over the regions and find all dirty pages. bool have_dirty_page_info = false; @@ -6613,9 +6618,10 @@ static void GetCoreFileSaveRangesDirtyOnly( // dirty regions as this will make the core file smaller. If the process // doesn't support dirty regions, then it will fall back to adding the full // stack region. -static void GetCoreFileSaveRangesStackOnly( - Process &process, const MemoryRegionInfos ®ions, - Process::CoreFileMemoryRanges &ranges, std::set &stack_ends) { +static void GetCoreFileSaveRangesStackOnly(Process &process, + const MemoryRegionInfos ®ions, + CoreFileMemoryRanges &ranges, + std::set &stack_ends) { const bool try_dirty_pages = true; // Some platforms support annotating the region information that tell us that // it comes from a thread stack. So look for those regions first. @@ -6628,6 +6634,24 @@ static void GetCoreFileSaveRangesStackOnly( } } +static void GetUserSpecifiedCoreFileSaveRanges(Process &process, + const MemoryRegionInfos ®ions, + const SaveCoreOptions &options, + CoreFileMemoryRanges &ranges) { + const auto &option_ranges = options.GetCoreFileMemoryRanges(); + if (option_ranges.IsEmpty()) + return; + + for (const auto &range : regions) { + auto entry = option_ranges.FindEntryThatContains(range.GetRange()); + if (entry) { + ranges.Append(range.GetRange().GetRangeBase(), + range.GetRange().GetByteSize(), + CreateCoreFileMemoryRange(range)); + } + } +} + Status Process::CalculateCoreFileSaveRanges(const SaveCoreOptions &options, CoreFileMemoryRanges &ranges) { lldb_private::MemoryRegionInfos regions; @@ -6643,11 +6667,18 @@ Status Process::CalculateCoreFileSaveRanges(const SaveCoreOptions &options, "callers must set the core_style to something other than " "eSaveCoreUnspecified"); + GetUserSpecifiedCoreFileSaveRanges(*this, regions, options, ranges); + std::set stack_ends; - SaveOffRegionsWithStackPointers(*this, options, regions, ranges, stack_ends); + // For fully custom set ups, we don't want to even look at threads if there + // are no threads specified. + if (core_style != lldb::eSaveCoreCustomOnly || options.HasSpecifiedThreads()) + SaveOffRegionsWithStackPointers(*this, options, regions, ranges, + stack_ends); switch (core_style) { case eSaveCoreUnspecified: + case eSaveCoreCustomOnly: break; case eSaveCoreFull: @@ -6666,10 +6697,11 @@ Status Process::CalculateCoreFileSaveRanges(const SaveCoreOptions &options, if (err.Fail()) return err; - if (ranges.empty()) - return Status("no valid address ranges found for core style"); + if (ranges.IsEmpty()) + return Status::FromErrorStringWithFormat( + "no valid address ranges found for core style"); - return Status(); // Success! + return ranges.FinalizeCoreFileSaveRanges(); } std::vector diff --git a/lldb/test/API/functionalities/process_save_core_minidump/TestProcessSaveCoreMinidump.py b/lldb/test/API/functionalities/process_save_core_minidump/TestProcessSaveCoreMinidump.py index ed15793b527fc9..2cbe20ee10b1af 100644 --- a/lldb/test/API/functionalities/process_save_core_minidump/TestProcessSaveCoreMinidump.py +++ b/lldb/test/API/functionalities/process_save_core_minidump/TestProcessSaveCoreMinidump.py @@ -344,3 +344,152 @@ def test_save_linux_mini_dump_default_options(self): self.assertTrue(self.dbg.DeleteTarget(target)) if os.path.isfile(default_value_file): os.unlink(default_value_file) + + @skipUnlessArch("x86_64") + @skipUnlessPlatform(["linux"]) + def test_save_linux_minidump_one_region(self): + """Test that we can save a Linux mini dump with one region in sbsavecore regions""" + + self.build() + exe = self.getBuildArtifact("a.out") + one_region_file = self.getBuildArtifact("core.one_region.dmp") + try: + target = self.dbg.CreateTarget(exe) + process = target.LaunchSimple( + None, None, self.get_process_working_directory() + ) + self.assertState(process.GetState(), lldb.eStateStopped) + + memory_region = lldb.SBMemoryRegionInfo() + memory_list = process.GetMemoryRegions() + memory_list.GetMemoryRegionAtIndex(0, memory_region) + + # This is almost identical to the single thread test case because + # minidump defaults to stacks only, so we want to see if the + # default options work as expected. + options = lldb.SBSaveCoreOptions() + file_spec = lldb.SBFileSpec(one_region_file) + options.SetOutputFile(file_spec) + options.SetPluginName("minidump") + options.AddMemoryRegionToSave(memory_region) + options.SetStyle(lldb.eSaveCoreCustomOnly) + error = process.SaveCore(options) + print(f"Error: {error.GetCString()}") + self.assertTrue(error.Success(), error.GetCString()) + + core_target = self.dbg.CreateTarget(None) + core_proc = core_target.LoadCore(one_region_file) + core_memory_list = core_proc.GetMemoryRegions() + # Note because the /proc/pid maps are included on linux, we can't + # depend on size for validation, so we'll ensure the first region + # is present and then assert we fail on the second. + core_memory_region = lldb.SBMemoryRegionInfo() + core_memory_list.GetMemoryRegionAtIndex(0, core_memory_region) + self.assertEqual( + core_memory_region.GetRegionBase(), memory_region.GetRegionBase() + ) + self.assertEqual( + core_memory_region.GetRegionEnd(), memory_region.GetRegionEnd() + ) + + region_two = lldb.SBMemoryRegionInfo() + core_memory_list.GetMemoryRegionAtIndex(1, region_two) + err = lldb.SBError() + content = core_proc.ReadMemory(region_two.GetRegionBase(), 1, err) + self.assertTrue(err.Fail(), "Should fail to read memory") + + finally: + self.assertTrue(self.dbg.DeleteTarget(target)) + if os.path.isfile(one_region_file): + os.unlink(one_region_file) + + @skipUnlessArch("x86_64") + @skipUnlessPlatform(["linux"]) + def test_save_minidump_custom_save_style(self): + """Test that verifies a custom and unspecified save style fails for + containing no data to save""" + + self.build() + exe = self.getBuildArtifact("a.out") + custom_file = self.getBuildArtifact("core.custom.dmp") + try: + target = self.dbg.CreateTarget(exe) + process = target.LaunchSimple( + None, None, self.get_process_working_directory() + ) + self.assertState(process.GetState(), lldb.eStateStopped) + + options = lldb.SBSaveCoreOptions() + options.SetOutputFile(lldb.SBFileSpec(custom_file)) + options.SetPluginName("minidump") + options.SetStyle(lldb.eSaveCoreCustomOnly) + + error = process.SaveCore(options) + self.assertTrue(error.Fail()) + self.assertEqual( + error.GetCString(), "no valid address ranges found for core style" + ) + + finally: + self.assertTrue(self.dbg.DeleteTarget(target)) + if os.path.isfile(custom_file): + os.unlink(custom_file) + + def save_core_with_region(self, process, region_index): + try: + custom_file = self.getBuildArtifact("core.custom.dmp") + memory_region = lldb.SBMemoryRegionInfo() + memory_list = process.GetMemoryRegions() + memory_list.GetMemoryRegionAtIndex(0, memory_region) + options = lldb.SBSaveCoreOptions() + options.SetOutputFile(lldb.SBFileSpec(custom_file)) + options.SetPluginName("minidump") + options.SetStyle(lldb.eSaveCoreFull) + + error = process.SaveCore(options) + self.assertTrue(error.Success()) + core_target = self.dbg.CreateTarget(None) + core_proc = core_target.LoadCore(custom_file) + core_memory_list = core_proc.GetMemoryRegions() + # proc/pid/ maps are included on linux, so we can't depend on size + # for validation, we make a set of all the ranges, + # and ensure no duplicates! + range_set = set() + for x in range(core_memory_list.GetSize()): + core_memory_region = lldb.SBMemoryRegionInfo() + core_memory_list.GetMemoryRegionAtIndex(x, core_memory_region) + mem_tuple = ( + core_memory_region.GetRegionBase(), + core_memory_region.GetRegionEnd(), + ) + self.assertTrue( + mem_tuple not in range_set, "Duplicate memory region found" + ) + range_set.add(mem_tuple) + finally: + if os.path.isfile(custom_file): + os.unlink(custom_file) + + @skipUnlessArch("x86_64") + @skipUnlessPlatform(["linux"]) + def test_save_minidump_custom_save_style_duplicated_regions(self): + """Test that verifies a custom and unspecified save style fails for + containing no data to save""" + + self.build() + exe = self.getBuildArtifact("a.out") + try: + target = self.dbg.CreateTarget(exe) + process = target.LaunchSimple( + None, None, self.get_process_working_directory() + ) + self.assertState(process.GetState(), lldb.eStateStopped) + + memory_list = process.GetMemoryRegions() + # Test that we don't duplicate regions, by duplicating regions + # at various indices. + self.save_core_with_region(process, 0) + self.save_core_with_region(process, len(memory_list) - 1) + + finally: + self.assertTrue(self.dbg.DeleteTarget(target)) diff --git a/lldb/test/Shell/SymbolFile/checksum-mismatch.test b/lldb/test/Shell/SymbolFile/checksum-mismatch.test index 5db97647c9aa02..8dc55f80e5c2ef 100644 --- a/lldb/test/Shell/SymbolFile/checksum-mismatch.test +++ b/lldb/test/Shell/SymbolFile/checksum-mismatch.test @@ -1,3 +1,5 @@ +UNSUPPORTED: system-windows + RUN: mkdir -p %t RUN: cp %S/Inputs/main.c %t/main.c RUN: %clang_host %t/main.c -std=c99 -gdwarf-5 -o %t/main.out diff --git a/lldb/unittests/Process/Utility/CMakeLists.txt b/lldb/unittests/Process/Utility/CMakeLists.txt index 651f871621fdfc..ec0ff95d073b92 100644 --- a/lldb/unittests/Process/Utility/CMakeLists.txt +++ b/lldb/unittests/Process/Utility/CMakeLists.txt @@ -18,6 +18,7 @@ add_lldb_unittest(ProcessUtilityTests LinuxProcMapsTest.cpp MemoryTagManagerAArch64MTETest.cpp RegisterContextTest.cpp + CoreFileMemoryRangesTest.cpp ${PLATFORM_SOURCES} LINK_LIBS diff --git a/lldb/unittests/Process/Utility/CoreFileMemoryRangesTest.cpp b/lldb/unittests/Process/Utility/CoreFileMemoryRangesTest.cpp new file mode 100644 index 00000000000000..6d514b11323864 --- /dev/null +++ b/lldb/unittests/Process/Utility/CoreFileMemoryRangesTest.cpp @@ -0,0 +1,205 @@ +//===-- CoreFileMemoryRangesTests.cpp +//---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include "lldb/Target/CoreFileMemoryRanges.h" +#include "lldb/lldb-types.h" + +using namespace lldb_private; + +TEST(CoreFileMemoryRangesTest, MapOverlappingRanges) { + lldb_private::CoreFileMemoryRanges ranges; + const lldb::addr_t start_addr = 0x1000; + const lldb::addr_t increment_addr = 0x1000; + const size_t iterations = 10; + for (size_t i = 0; i < iterations; i++) { + const lldb::addr_t start = start_addr + (i * increment_addr); + const lldb::addr_t end = start + increment_addr; + // Arbitrary value + const uint32_t permissions = 0x3; + llvm::AddressRange range(start, end); + const CoreFileMemoryRange core_range = {range, permissions}; + // The range data is Start, Size, While the range is start-end. + CoreFileMemoryRanges::Entry entry = {start, end - start, core_range}; + ranges.Append(entry); + } + + Status error = ranges.FinalizeCoreFileSaveRanges(); + EXPECT_TRUE(error.Success()); + ASSERT_THAT(1, ranges.GetSize()); + const auto range = ranges.GetEntryAtIndex(0); + ASSERT_TRUE(range); + ASSERT_THAT(start_addr, range->GetRangeBase()); + ASSERT_THAT(start_addr + (iterations * increment_addr), range->GetRangeEnd()); +} + +TEST(CoreFileMemoryRangesTest, RangesSplitByPermissions) { + lldb_private::CoreFileMemoryRanges ranges; + const lldb::addr_t start_addr = 0x1000; + const lldb::addr_t increment_addr = 0x1000; + const size_t iterations = 10; + for (size_t i = 0; i < iterations; i++) { + const lldb::addr_t start = start_addr + (i * increment_addr); + const lldb::addr_t end = start + increment_addr; + const uint32_t permissions = i; + llvm::AddressRange range(start, end); + const CoreFileMemoryRange core_range = {range, permissions}; + // The range data is Start, Size, While the range is start-end. + CoreFileMemoryRanges::Entry entry = {start, end - start, core_range}; + ranges.Append(entry); + } + + Status error = ranges.FinalizeCoreFileSaveRanges(); + EXPECT_TRUE(error.Success()); + ASSERT_THAT(10, ranges.GetSize()); + const auto range = ranges.GetEntryAtIndex(0); + ASSERT_TRUE(range); + ASSERT_THAT(start_addr, range->GetRangeBase()); + ASSERT_THAT(start_addr + increment_addr, range->GetRangeEnd()); +} + +TEST(CoreFileMemoryRangesTest, MapPartialOverlappingRanges) { + lldb_private::CoreFileMemoryRanges ranges; + const lldb::addr_t start_addr = 0x1000; + const lldb::addr_t increment_addr = 0x1000; + const size_t iterations = 10; + for (size_t i = 0; i < iterations; i++) { + const lldb::addr_t start = start_addr + (i * increment_addr); + const lldb::addr_t end = start + increment_addr; + // Arbitrary value + const uint32_t permissions = 0x3; + llvm::AddressRange range(start, end); + const CoreFileMemoryRange core_range = {range, permissions}; + // The range data is Start, Size, While the range is start-end. + CoreFileMemoryRanges::Entry entry = {start, end - start, core_range}; + ranges.Append(entry); + } + + const lldb::addr_t unique_start = 0x7fff0000; + const lldb::addr_t unique_end = unique_start + increment_addr; + llvm::AddressRange range(unique_start, unique_end); + const uint32_t permissions = 0x3; + const CoreFileMemoryRange core_range = {range, permissions}; + // The range data is Start, Size, While the range is start-end. + CoreFileMemoryRanges::Entry entry = {unique_start, unique_end - unique_start, + core_range}; + ranges.Append(entry); + + Status error = ranges.FinalizeCoreFileSaveRanges(); + EXPECT_TRUE(error.Success()); + ASSERT_THAT(2, ranges.GetSize()); + const auto merged_range = ranges.GetEntryAtIndex(0); + ASSERT_TRUE(merged_range); + ASSERT_THAT(start_addr, merged_range->GetRangeBase()); + ASSERT_THAT(start_addr + (iterations * increment_addr), + merged_range->GetRangeEnd()); + const auto unique_range = ranges.GetEntryAtIndex(1); + ASSERT_TRUE(unique_range); + ASSERT_THAT(unique_start, unique_range->GetRangeBase()); + ASSERT_THAT(unique_end, unique_range->GetRangeEnd()); +} + +TEST(CoreFileMemoryRangesTest, SuperiorAndInferiorRanges_SamePermissions) { + lldb_private::CoreFileMemoryRanges ranges; + const lldb::addr_t start_addr = 0x1000; + const lldb::addr_t increment_addr = 0x1000; + const lldb::addr_t superior_region_end = start_addr + increment_addr * 10; + llvm::AddressRange range(start_addr, superior_region_end); + const CoreFileMemoryRange core_range = {range, 0x3}; + CoreFileMemoryRanges::Entry entry = { + start_addr, superior_region_end - start_addr, core_range}; + ranges.Append(entry); + const lldb::addr_t inferior_region_end = start_addr + increment_addr; + llvm::AddressRange inferior_range(start_addr, inferior_region_end); + const CoreFileMemoryRange inferior_core_range = {inferior_range, 0x3}; + CoreFileMemoryRanges::Entry inferior_entry = { + start_addr, inferior_region_end - start_addr, inferior_core_range}; + ranges.Append(inferior_entry); + + Status error = ranges.FinalizeCoreFileSaveRanges(); + EXPECT_TRUE(error.Success()); + ASSERT_THAT(1, ranges.GetSize()); + const auto searched_range = ranges.GetEntryAtIndex(0); + ASSERT_TRUE(searched_range); + ASSERT_THAT(start_addr, searched_range->GetRangeBase()); + ASSERT_THAT(superior_region_end, searched_range->GetRangeEnd()); +} + +TEST(CoreFileMemoryRangesTest, SuperiorAndInferiorRanges_DifferentPermissions) { + lldb_private::CoreFileMemoryRanges ranges; + const lldb::addr_t start_addr = 0x1000; + const lldb::addr_t increment_addr = 0x1000; + const lldb::addr_t superior_region_end = start_addr + increment_addr * 10; + llvm::AddressRange range(start_addr, superior_region_end); + const CoreFileMemoryRange core_range = {range, 0x3}; + CoreFileMemoryRanges::Entry entry = { + start_addr, superior_region_end - start_addr, core_range}; + ranges.Append(entry); + const lldb::addr_t inferior_region_end = start_addr + increment_addr; + llvm::AddressRange inferior_range(start_addr, inferior_region_end); + const CoreFileMemoryRange inferior_core_range = {inferior_range, 0x4}; + CoreFileMemoryRanges::Entry inferior_entry = { + start_addr, inferior_region_end - start_addr, inferior_core_range}; + ranges.Append(inferior_entry); + + Status error = ranges.FinalizeCoreFileSaveRanges(); + EXPECT_TRUE(error.Fail()); +} + +TEST(CoreFileMemoryRangesTest, NonIntersectingRangesSamePermissions) { + const int permissions = 0x7; + lldb_private::CoreFileMemoryRanges ranges; + const lldb::addr_t region_one_start = 0x1000; + const lldb::addr_t region_one_end = 0x2000; + llvm::AddressRange range_one(region_one_start, region_one_end); + const CoreFileMemoryRange core_range_one = {range_one, permissions}; + CoreFileMemoryRanges::Entry entry_one = { + region_one_start, region_one_end - region_one_start, core_range_one}; + ranges.Append(entry_one); + const lldb::addr_t region_two_start = 0xb000; + const lldb::addr_t region_two_end = 0xc000; + llvm::AddressRange range_two(region_two_start, region_two_end); + const CoreFileMemoryRange core_range_two = {range_two, permissions}; + CoreFileMemoryRanges::Entry entry_two = { + region_two_start, region_two_end - region_two_start, core_range_two}; + ranges.Append(entry_two); + + Status error = ranges.FinalizeCoreFileSaveRanges(); + EXPECT_TRUE(error.Success()); + ASSERT_THAT(2UL, ranges.GetSize()); + ASSERT_THAT(region_one_start, ranges.GetEntryAtIndex(0)->GetRangeBase()); + ASSERT_THAT(region_two_start, ranges.GetEntryAtIndex(1)->GetRangeBase()); +} + +TEST(CoreFileMemoryRangesTest, PartialOverlapping) { + const int permissions = 0x3; + lldb_private::CoreFileMemoryRanges ranges; + const lldb::addr_t start_addr = 0x1000; + const lldb::addr_t end_addr = 0x2000; + llvm::AddressRange range_one(start_addr, end_addr); + const CoreFileMemoryRange core_range_one = {range_one, permissions}; + CoreFileMemoryRanges::Entry entry_one = {start_addr, end_addr - start_addr, + core_range_one}; + llvm::AddressRange range_two(start_addr / 2, end_addr / 2); + const CoreFileMemoryRange core_range_two = {range_two, permissions}; + CoreFileMemoryRanges::Entry entry_two = { + start_addr / 2, end_addr / 2 - start_addr / 2, core_range_two}; + ranges.Append(entry_one); + ranges.Append(entry_two); + + Status error = ranges.FinalizeCoreFileSaveRanges(); + EXPECT_TRUE(error.Success()); + ASSERT_THAT(1, ranges.GetSize()); + const auto searched_range = ranges.GetEntryAtIndex(0); + ASSERT_TRUE(searched_range); + ASSERT_THAT(start_addr / 2, searched_range->GetRangeBase()); + ASSERT_THAT(end_addr, searched_range->GetRangeEnd()); +} diff --git a/llvm/benchmarks/SandboxIRBench.cpp b/llvm/benchmarks/SandboxIRBench.cpp index c646ba6290daff..d4601d5f53d07a 100644 --- a/llvm/benchmarks/SandboxIRBench.cpp +++ b/llvm/benchmarks/SandboxIRBench.cpp @@ -34,15 +34,19 @@ static std::unique_ptr parseIR(LLVMContext &C, const char *IR) { } enum class IR { - LLVM, - SBox, + LLVM, ///> LLVM IR + SBoxNoTracking, ///> Sandbox IR with tracking disabled + SBoxTracking, ///> Sandbox IR with tracking enabled }; // Traits to get llvm::BasicBlock/sandboxir::BasicBlock from IR::LLVM/IR::SBox. template struct TypeSelect {}; template <> struct TypeSelect { using BasicBlock = llvm::BasicBlock; }; -template <> struct TypeSelect { +template <> struct TypeSelect { + using BasicBlock = sandboxir::BasicBlock; +}; +template <> struct TypeSelect { using BasicBlock = sandboxir::BasicBlock; }; @@ -59,12 +63,22 @@ genIR(std::unique_ptr &LLVMM, LLVMContext &LLVMCtx, sandboxir::Function *F = Ctx.createFunction(LLVMF); sandboxir::BasicBlock *BB = &*F->begin(); + // Start tracking if we are testing with tracking enabled. + if constexpr (IRTy == IR::SBoxTracking) + Ctx.save(); + if constexpr (IRTy == IR::LLVM) return LLVMBB; else return BB; } +template static void finalize(sandboxir::Context &Ctx) { + // Accept changes if we are tracking. + if constexpr (IRTy == IR::SBoxTracking) + Ctx.accept(); +} + static std::string generateBBWalkIR(unsigned Size) { std::stringstream SS; SS << "define void @foo(i32 %v1, i32 %v2) {\n"; @@ -75,6 +89,31 @@ static std::string generateBBWalkIR(unsigned Size) { return SS.str(); } +template static void SBoxIRCreation(benchmark::State &State) { + static_assert(IRTy != IR::LLVM, "Expected SBoxTracking or SBoxNoTracking"); + LLVMContext LLVMCtx; + unsigned NumInstrs = State.range(0); + std::unique_ptr LLVMM; + std::string IRStr = generateBBWalkIR(NumInstrs); + LLVMM = parseIR(LLVMCtx, IRStr.c_str()); + llvm::Function *LLVMF = &*LLVMM->getFunction("foo"); + + for (auto _ : State) { + State.PauseTiming(); + sandboxir::Context Ctx(LLVMCtx); + if constexpr (IRTy == IR::SBoxTracking) + Ctx.save(); + State.ResumeTiming(); + + sandboxir::Function *F = Ctx.createFunction(LLVMF); + benchmark::DoNotOptimize(F); + State.PauseTiming(); + if constexpr (IRTy == IR::SBoxTracking) + Ctx.accept(); + State.ResumeTiming(); + } +} + template static void BBWalk(benchmark::State &State) { LLVMContext LLVMCtx; sandboxir::Context Ctx(LLVMCtx); @@ -132,15 +171,71 @@ template static void RAUW(benchmark::State &State) { Def1->replaceAllUsesWith(Def2); Def2->replaceAllUsesWith(Def1); } + finalize(Ctx); } +static std::string generateRUOWIR(unsigned NumOperands) { + std::stringstream SS; + auto GenOps = [&SS, NumOperands]() { + for (auto Cnt : seq(0, NumOperands)) { + SS << "i8 %arg" << Cnt; + bool IsLast = Cnt + 1 == NumOperands; + if (!IsLast) + SS << ", "; + } + }; + + SS << "define void @foo("; + GenOps(); + SS << ") {\n"; + + SS << " call void @foo("; + GenOps(); + SS << ")\n"; + SS << "ret void"; + SS << "}"; + return SS.str(); +} + +template static void RUOW(benchmark::State &State) { + LLVMContext LLVMCtx; + sandboxir::Context Ctx(LLVMCtx); + std::unique_ptr LLVMM; + unsigned NumOperands = State.range(0); + auto *BB = genIR(LLVMM, LLVMCtx, Ctx, generateRUOWIR, NumOperands); + + auto It = BB->begin(); + auto *F = BB->getParent(); + auto *Arg0 = F->getArg(0); + auto *Arg1 = F->getArg(1); + auto *Call = &*It++; + for (auto _ : State) + Call->replaceUsesOfWith(Arg0, Arg1); + finalize(Ctx); +} + +// Measure the time it takes to create Sandbox IR without/with tracking. +BENCHMARK(SBoxIRCreation) + ->Args({10}) + ->Args({100}) + ->Args({1000}); +BENCHMARK(SBoxIRCreation) + ->Args({10}) + ->Args({100}) + ->Args({1000}); + BENCHMARK(GetType); -BENCHMARK(GetType); +BENCHMARK(GetType); BENCHMARK(BBWalk)->Args({1024}); -BENCHMARK(BBWalk)->Args({1024}); +BENCHMARK(BBWalk)->Args({1024}); BENCHMARK(RAUW)->Args({512}); -BENCHMARK(RAUW)->Args({512}); +BENCHMARK(RAUW)->Args({512}); +BENCHMARK(RAUW)->Args({512}); + +BENCHMARK(RUOW)->Args({4096}); +BENCHMARK(RUOW)->Args({4096}); +BENCHMARK(RUOW)->Args({4096}); BENCHMARK_MAIN(); diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst index a982c3a29fcc3b..ad8ede9c59fbfa 100644 --- a/llvm/docs/DirectX/DXILResources.rst +++ b/llvm/docs/DirectX/DXILResources.rst @@ -361,6 +361,12 @@ Examples: - ``i32`` - Index into the buffer +.. code-block:: llvm + + %ret = call {<4 x float>, i1} + @llvm.dx.typedBufferLoad.checkbit.v4f32.tdx.TypedBuffer_v4f32_0_0_0t( + target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %index) + Texture and Typed Buffer Stores ------------------------------- diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index fb267cf5cee1c6..083d5c9388f7c8 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -120,7 +120,7 @@ class DenseMapBase : public DebugEpochBase { } const KeyT EmptyKey = getEmptyKey(); - if (std::is_trivially_destructible::value) { + if constexpr (std::is_trivially_destructible_v) { // Use a simpler loop when values don't need destruction. for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) P->getFirst() = EmptyKey; diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index c5eff151ca4180..09fc34af60dc3c 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -218,8 +218,8 @@ class MemoryAccess inline unsigned getID() const; MemoryAccess(LLVMContext &C, unsigned Vty, DeleteValueTy DeleteValue, - BasicBlock *BB, unsigned NumOperands) - : DerivedUser(Type::getVoidTy(C), Vty, nullptr, NumOperands, DeleteValue), + BasicBlock *BB, AllocInfo AllocInfo) + : DerivedUser(Type::getVoidTy(C), Vty, AllocInfo, DeleteValue), Block(BB) {} // Use deleteValue() to delete a generic MemoryAccess. @@ -280,8 +280,8 @@ class MemoryUseOrDef : public MemoryAccess { MemoryUseOrDef(LLVMContext &C, MemoryAccess *DMA, unsigned Vty, DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB, - unsigned NumOperands) - : MemoryAccess(C, Vty, DeleteValue, BB, NumOperands), + AllocInfo AllocInfo) + : MemoryAccess(C, Vty, DeleteValue, BB, AllocInfo), MemoryInstruction(MI) { setDefiningAccess(DMA); } @@ -307,15 +307,16 @@ class MemoryUseOrDef : public MemoryAccess { /// MemoryUse's is exactly the set of Instructions for which /// AliasAnalysis::getModRefInfo returns "Ref". class MemoryUse final : public MemoryUseOrDef { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{1}; + public: DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB) - : MemoryUseOrDef(C, DMA, MemoryUseVal, deleteMe, MI, BB, - /*NumOperands=*/1) {} + : MemoryUseOrDef(C, DMA, MemoryUseVal, deleteMe, MI, BB, AllocMarker) {} // allocate space for exactly one operand - void *operator new(size_t S) { return User::operator new(S, 1); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } static bool classof(const Value *MA) { @@ -367,6 +368,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUse, MemoryAccess) /// associated with them. This use points to the nearest reaching /// MemoryDef/MemoryPhi. class MemoryDef final : public MemoryUseOrDef { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{2}; + public: friend class MemorySSA; @@ -374,12 +377,11 @@ class MemoryDef final : public MemoryUseOrDef { MemoryDef(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB, unsigned Ver) - : MemoryUseOrDef(C, DMA, MemoryDefVal, deleteMe, MI, BB, - /*NumOperands=*/2), + : MemoryUseOrDef(C, DMA, MemoryDefVal, deleteMe, MI, BB, AllocMarker), ID(Ver) {} // allocate space for exactly two operands - void *operator new(size_t S) { return User::operator new(S, 2); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } static bool classof(const Value *MA) { @@ -474,8 +476,10 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess) /// Because MemoryUse's do not generate new definitions, they do not have this /// issue. class MemoryPhi final : public MemoryAccess { + constexpr static HungOffOperandsAllocMarker AllocMarker{}; + // allocate space for exactly zero operands - void *operator new(size_t S) { return User::operator new(S); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } public: void operator delete(void *Ptr) { User::operator delete(Ptr); } @@ -484,7 +488,7 @@ class MemoryPhi final : public MemoryAccess { DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); MemoryPhi(LLVMContext &C, BasicBlock *BB, unsigned Ver, unsigned NumPreds = 0) - : MemoryAccess(C, MemoryPhiVal, deleteMe, BB, 0), ID(Ver), + : MemoryAccess(C, MemoryPhiVal, deleteMe, BB, AllocMarker), ID(Ver), ReservedSpace(NumPreds) { allocHungoffUses(ReservedSpace); } diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 65514882343dbe..18ed60ebb124dc 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1304,7 +1304,7 @@ enum NodeType { /// This corresponds to "load atomic" instruction. ATOMIC_LOAD, - /// OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) + /// OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) /// This corresponds to "store atomic" instruction. ATOMIC_STORE, diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h index a82e37b7e2df23..0aefb5ecf6b7f2 100644 --- a/llvm/include/llvm/IR/Constant.h +++ b/llvm/include/llvm/IR/Constant.h @@ -41,8 +41,8 @@ class APInt; /// LLVM Constant Representation class Constant : public User { protected: - Constant(Type *ty, ValueTy vty, Use *Ops, unsigned NumOps) - : User(ty, vty, Ops, NumOps) {} + Constant(Type *ty, ValueTy vty, AllocInfo AllocInfo) + : User(ty, vty, AllocInfo) {} ~Constant() = default; diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index 62ccde96e5397b..3b16aa039a5087 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -51,6 +51,8 @@ template struct ConstantAggrKeyType; /// Since they can be in use by unrelated modules (and are never based on /// GlobalValues), it never makes sense to RAUW them. class ConstantData : public Constant { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{0}; + friend class Constant; Value *handleOperandChangeImpl(Value *From, Value *To) { @@ -58,9 +60,9 @@ class ConstantData : public Constant { } protected: - explicit ConstantData(Type *Ty, ValueTy VT) : Constant(Ty, VT, nullptr, 0) {} + explicit ConstantData(Type *Ty, ValueTy VT) : Constant(Ty, VT, AllocMarker) {} - void *operator new(size_t S) { return User::operator new(S, 0); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } public: void operator delete(void *Ptr) { User::operator delete(Ptr); } @@ -399,7 +401,8 @@ class ConstantAggregateZero final : public ConstantData { /// use operands. class ConstantAggregate : public Constant { protected: - ConstantAggregate(Type *T, ValueTy VT, ArrayRef V); + ConstantAggregate(Type *T, ValueTy VT, ArrayRef V, + AllocInfo AllocInfo); public: /// Transparently provide more efficient getOperand methods. @@ -425,7 +428,7 @@ class ConstantArray final : public ConstantAggregate { friend struct ConstantAggrKeyType; friend class Constant; - ConstantArray(ArrayType *T, ArrayRef Val); + ConstantArray(ArrayType *T, ArrayRef Val, AllocInfo AllocInfo); void destroyConstantImpl(); Value *handleOperandChangeImpl(Value *From, Value *To); @@ -457,7 +460,7 @@ class ConstantStruct final : public ConstantAggregate { friend struct ConstantAggrKeyType; friend class Constant; - ConstantStruct(StructType *T, ArrayRef Val); + ConstantStruct(StructType *T, ArrayRef Val, AllocInfo AllocInfo); void destroyConstantImpl(); Value *handleOperandChangeImpl(Value *From, Value *To); @@ -509,7 +512,7 @@ class ConstantVector final : public ConstantAggregate { friend struct ConstantAggrKeyType; friend class Constant; - ConstantVector(VectorType *T, ArrayRef Val); + ConstantVector(VectorType *T, ArrayRef Val, AllocInfo AllocInfo); void destroyConstantImpl(); Value *handleOperandChangeImpl(Value *From, Value *To); @@ -890,9 +893,11 @@ class ConstantTargetNone final : public ConstantData { class BlockAddress final : public Constant { friend class Constant; + constexpr static IntrusiveOperandsAllocMarker AllocMarker{2}; + BlockAddress(Function *F, BasicBlock *BB); - void *operator new(size_t S) { return User::operator new(S, 2); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void destroyConstantImpl(); Value *handleOperandChangeImpl(Value *From, Value *To); @@ -936,9 +941,11 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BlockAddress, Value) class DSOLocalEquivalent final : public Constant { friend class Constant; + constexpr static IntrusiveOperandsAllocMarker AllocMarker{1}; + DSOLocalEquivalent(GlobalValue *GV); - void *operator new(size_t S) { return User::operator new(S, 1); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void destroyConstantImpl(); Value *handleOperandChangeImpl(Value *From, Value *To); @@ -973,9 +980,11 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(DSOLocalEquivalent, Value) class NoCFIValue final : public Constant { friend class Constant; + constexpr static IntrusiveOperandsAllocMarker AllocMarker{1}; + NoCFIValue(GlobalValue *GV); - void *operator new(size_t S) { return User::operator new(S, 1); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void destroyConstantImpl(); Value *handleOperandChangeImpl(Value *From, Value *To); @@ -1013,10 +1022,12 @@ class ConstantPtrAuth final : public Constant { friend struct ConstantPtrAuthKeyType; friend class Constant; + constexpr static IntrusiveOperandsAllocMarker AllocMarker{4}; + ConstantPtrAuth(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc); - void *operator new(size_t s) { return User::operator new(s, 4); } + void *operator new(size_t s) { return User::operator new(s, AllocMarker); } void destroyConstantImpl(); Value *handleOperandChangeImpl(Value *From, Value *To); @@ -1102,8 +1113,8 @@ class ConstantExpr : public Constant { Value *handleOperandChangeImpl(Value *From, Value *To); protected: - ConstantExpr(Type *ty, unsigned Opcode, Use *Ops, unsigned NumOps) - : Constant(ty, ConstantExprVal, Ops, NumOps) { + ConstantExpr(Type *ty, unsigned Opcode, AllocInfo AllocInfo) + : Constant(ty, ConstantExprVal, AllocInfo) { // Operation type (an Instruction opcode) is stored as the SubclassData. setValueSubclassData(Opcode); } diff --git a/llvm/include/llvm/IR/DerivedUser.h b/llvm/include/llvm/IR/DerivedUser.h index a25d316c2d60bc..a307315864b425 100644 --- a/llvm/include/llvm/IR/DerivedUser.h +++ b/llvm/include/llvm/IR/DerivedUser.h @@ -34,9 +34,9 @@ class DerivedUser : public User { DeleteValueTy DeleteValue; public: - DerivedUser(Type *Ty, unsigned VK, Use *U, unsigned NumOps, + DerivedUser(Type *Ty, unsigned VK, AllocInfo AllocInfo, DeleteValueTy DeleteValue) - : User(Ty, VK, U, NumOps), DeleteValue(DeleteValue) {} + : User(Ty, VK, AllocInfo), DeleteValue(DeleteValue) {} }; } // end namespace llvm diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index f7e4e976ae4c44..866c68d15e4011 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -72,6 +72,8 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node { using const_arg_iterator = const Argument *; private: + constexpr static HungOffOperandsAllocMarker AllocMarker{}; + // Important things that make up a function! BasicBlockListType BasicBlocks; ///< The basic blocks @@ -171,13 +173,14 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node { static Function *Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N = "", Module *M = nullptr) { - return new Function(Ty, Linkage, AddrSpace, N, M); + return new (AllocMarker) Function(Ty, Linkage, AddrSpace, N, M); } // TODO: remove this once all users have been updated to pass an AddrSpace static Function *Create(FunctionType *Ty, LinkageTypes Linkage, const Twine &N = "", Module *M = nullptr) { - return new Function(Ty, Linkage, static_cast(-1), N, M); + return new (AllocMarker) + Function(Ty, Linkage, static_cast(-1), N, M); } /// Creates a new function and attaches it to a module. diff --git a/llvm/include/llvm/IR/GlobalAlias.h b/llvm/include/llvm/IR/GlobalAlias.h index 583d66e28155d7..3db6984c4a30c3 100644 --- a/llvm/include/llvm/IR/GlobalAlias.h +++ b/llvm/include/llvm/IR/GlobalAlias.h @@ -28,6 +28,8 @@ template class SymbolTableListTraits; class GlobalAlias : public GlobalValue, public ilist_node { friend class SymbolTableListTraits; + constexpr static IntrusiveOperandsAllocMarker AllocMarker{1}; + GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent); @@ -59,7 +61,7 @@ class GlobalAlias : public GlobalValue, public ilist_node { static GlobalAlias *create(const Twine &Name, GlobalValue *Aliasee); // allocate space for exactly one operand - void *operator new(size_t S) { return User::operator new(S, 1); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } /// Provide fast operand accessors diff --git a/llvm/include/llvm/IR/GlobalIFunc.h b/llvm/include/llvm/IR/GlobalIFunc.h index 8935284f32d759..0d2f152cef403e 100644 --- a/llvm/include/llvm/IR/GlobalIFunc.h +++ b/llvm/include/llvm/IR/GlobalIFunc.h @@ -34,6 +34,8 @@ template class SymbolTableListTraits; class GlobalIFunc final : public GlobalObject, public ilist_node { friend class SymbolTableListTraits; + constexpr static IntrusiveOperandsAllocMarker AllocMarker{1}; + GlobalIFunc(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Resolver, Module *Parent); @@ -48,7 +50,7 @@ class GlobalIFunc final : public GlobalObject, public ilist_node { Constant *Resolver, Module *Parent); // allocate space for exactly one operand - void *operator new(size_t S) { return User::operator new(S, 1); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } /// Provide fast operand accessors diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h index b6a974d8bb9f08..08edc13d81f880 100644 --- a/llvm/include/llvm/IR/GlobalObject.h +++ b/llvm/include/llvm/IR/GlobalObject.h @@ -40,10 +40,9 @@ class GlobalObject : public GlobalValue { }; protected: - GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps, - LinkageTypes Linkage, const Twine &Name, - unsigned AddressSpace = 0) - : GlobalValue(Ty, VTy, Ops, NumOps, Linkage, Name, AddressSpace) { + GlobalObject(Type *Ty, ValueTy VTy, AllocInfo AllocInfo, LinkageTypes Linkage, + const Twine &Name, unsigned AddressSpace = 0) + : GlobalValue(Ty, VTy, AllocInfo, Linkage, Name, AddressSpace) { setGlobalValueSubClassData(0); } ~GlobalObject(); diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h index 53eddebdd6ae68..d9104d7af5f972 100644 --- a/llvm/include/llvm/IR/GlobalValue.h +++ b/llvm/include/llvm/IR/GlobalValue.h @@ -77,9 +77,9 @@ class GlobalValue : public Constant { }; protected: - GlobalValue(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps, - LinkageTypes Linkage, const Twine &Name, unsigned AddressSpace) - : Constant(PointerType::get(Ty, AddressSpace), VTy, Ops, NumOps), + GlobalValue(Type *Ty, ValueTy VTy, AllocInfo AllocInfo, LinkageTypes Linkage, + const Twine &Name, unsigned AddressSpace) + : Constant(PointerType::get(Ty, AddressSpace), VTy, AllocInfo), ValueType(Ty), Visibility(DefaultVisibility), UnnamedAddrVal(unsigned(UnnamedAddr::None)), DllStorageClass(DefaultStorageClass), ThreadLocal(NotThreadLocal), diff --git a/llvm/include/llvm/IR/GlobalVariable.h b/llvm/include/llvm/IR/GlobalVariable.h index 0736c300de72f5..83e484816d7d4c 100644 --- a/llvm/include/llvm/IR/GlobalVariable.h +++ b/llvm/include/llvm/IR/GlobalVariable.h @@ -39,6 +39,8 @@ class DIGlobalVariableExpression; class GlobalVariable : public GlobalObject, public ilist_node { friend class SymbolTableListTraits; + constexpr static IntrusiveOperandsAllocMarker AllocMarker{1}; + AttributeSet Attrs; // Is this a global constant? @@ -70,24 +72,31 @@ class GlobalVariable : public GlobalObject, public ilist_node { GlobalVariable(const GlobalVariable &) = delete; GlobalVariable &operator=(const GlobalVariable &) = delete; +private: + /// Set the number of operands on a GlobalVariable. + /// + /// GlobalVariable always allocates space for a single operands, but + /// doesn't always use it. + void setGlobalVariableNumOperands(unsigned NumOps) { + assert(NumOps <= 1 && "GlobalVariable can only have 0 or 1 operands"); + NumUserOperands = NumOps; + } + +public: ~GlobalVariable() { dropAllReferences(); + + // Number of operands can be set to 0 after construction and initialization. + // Make sure that number of operands is reset to 1, as this is needed in + // User::operator delete + setGlobalVariableNumOperands(1); } // allocate space for exactly one operand - void *operator new(size_t s) { - return User::operator new(s, 1); - } + void *operator new(size_t s) { return User::operator new(s, AllocMarker); } // delete space for exactly one operand as created in the corresponding new operator - void operator delete(void *ptr){ - assert(ptr != nullptr && "must not be nullptr"); - User *Obj = static_cast(ptr); - // Number of operands can be set to 0 after construction and initialization. Make sure - // that number of operands is reset to 1, as this is needed in User::operator delete - Obj->setGlobalVariableNumOperands(1); - User::operator delete(Obj); - } + void operator delete(void *ptr) { User::operator delete(ptr); } /// Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 5ed3ec46dce57d..4720533bac8598 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -55,24 +55,26 @@ typedef unsigned ID; //===----------------------------------------------------------------------===// class UnaryInstruction : public Instruction { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{1}; + protected: UnaryInstruction(Type *Ty, unsigned iType, Value *V, BasicBlock::iterator IB) - : Instruction(Ty, iType, &Op<0>(), 1, IB) { + : Instruction(Ty, iType, AllocMarker, IB) { Op<0>() = V; } UnaryInstruction(Type *Ty, unsigned iType, Value *V, Instruction *IB = nullptr) - : Instruction(Ty, iType, &Op<0>(), 1, IB) { + : Instruction(Ty, iType, AllocMarker, IB) { Op<0>() = V; } UnaryInstruction(Type *Ty, unsigned iType, Value *V, BasicBlock *IAE) - : Instruction(Ty, iType, &Op<0>(), 1, IAE) { + : Instruction(Ty, iType, AllocMarker, IAE) { Op<0>() = V; } public: // allocate space for exactly one operand - void *operator new(size_t S) { return User::operator new(S, 1); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } /// Transparently provide more efficient getOperand methods. @@ -186,6 +188,8 @@ class UnaryOperator : public UnaryInstruction { //===----------------------------------------------------------------------===// class BinaryOperator : public Instruction { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{2}; + void AssertOK(); protected: @@ -199,7 +203,7 @@ class BinaryOperator : public Instruction { public: // allocate space for exactly two operands - void *operator new(size_t S) { return User::operator new(S, 2); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } /// Transparently provide more efficient getOperand methods. @@ -745,6 +749,8 @@ class PossiblyNonNegInst : public CastInst { /// This class is the base class for the comparison instructions. /// Abstract base class of comparison instructions. class CmpInst : public Instruction { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{2}; + public: /// This enumeration lists the possible predicates for CmpInst subclasses. /// Values in the range 0-31 are reserved for FCmpInst, while values in the @@ -814,7 +820,7 @@ class CmpInst : public Instruction { public: // allocate space for exactly two operands - void *operator new(size_t S) { return User::operator new(S, 2); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } /// Construct a compare instruction, given the opcode, the predicate and @@ -2416,10 +2422,10 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CallBase, Value) //===----------------------------------------------------------------------===// class FuncletPadInst : public Instruction { private: - FuncletPadInst(const FuncletPadInst &CPI); + FuncletPadInst(const FuncletPadInst &CPI, AllocInfo AllocInfo); explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad, - ArrayRef Args, unsigned Values, + ArrayRef Args, AllocInfo AllocInfo, const Twine &NameStr, InsertPosition InsertBefore); void init(Value *ParentPad, ArrayRef Args, const Twine &NameStr); diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index c27572300d5063..a12d5d9d8fe945 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -1030,7 +1030,7 @@ class Instruction : public User, setValueSubclassData(Storage); } - Instruction(Type *Ty, unsigned iType, Use *Ops, unsigned NumOps, + Instruction(Type *Ty, unsigned iType, AllocInfo AllocInfo, InsertPosition InsertBefore = nullptr); private: diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index ab3321ee755717..e89739a5552662 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -297,6 +297,8 @@ class StoreInst : public Instruction { void AssertOK(); + constexpr static IntrusiveOperandsAllocMarker AllocMarker{2}; + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -314,7 +316,7 @@ class StoreInst : public Instruction { InsertPosition InsertBefore = nullptr); // allocate space for exactly two operands - void *operator new(size_t S) { return User::operator new(S, 2); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } /// Return true if this is a store to a volatile memory location. @@ -420,6 +422,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value) class FenceInst : public Instruction { using OrderingField = AtomicOrderingBitfieldElementT<0>; + constexpr static IntrusiveOperandsAllocMarker AllocMarker{0}; + void Init(AtomicOrdering Ordering, SyncScope::ID SSID); protected: @@ -436,7 +440,7 @@ class FenceInst : public Instruction { InsertPosition InsertBefore = nullptr); // allocate space for exactly zero operands - void *operator new(size_t S) { return User::operator new(S, 0); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } /// Returns the ordering constraint of this fence instruction. @@ -502,6 +506,8 @@ class AtomicCmpXchgInst : public Instruction { typename Bitfield::Element; + constexpr static IntrusiveOperandsAllocMarker AllocMarker{3}; + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -515,7 +521,7 @@ class AtomicCmpXchgInst : public Instruction { InsertPosition InsertBefore = nullptr); // allocate space for exactly three operands - void *operator new(size_t S) { return User::operator new(S, 3); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } using VolatileField = BoolBitfieldElementT<0>; @@ -774,13 +780,15 @@ class AtomicRMWInst : public Instruction { using BinOpBitfieldElement = typename Bitfield::Element; + constexpr static IntrusiveOperandsAllocMarker AllocMarker{2}; + public: AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, Align Alignment, AtomicOrdering Ordering, SyncScope::ID SSID, InsertPosition InsertBefore = nullptr); // allocate space for exactly two operands - void *operator new(size_t S) { return User::operator new(S, 2); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } using VolatileField = BoolBitfieldElementT<0>; @@ -924,14 +932,14 @@ class GetElementPtrInst : public Instruction { Type *SourceElementType; Type *ResultElementType; - GetElementPtrInst(const GetElementPtrInst &GEPI); + GetElementPtrInst(const GetElementPtrInst &GEPI, AllocInfo AllocInfo); /// Constructors - Create a getelementptr instruction with a base pointer an /// list of indices. The first and second ctor can optionally insert before an /// existing instruction, the third appends the new instruction to the /// specified BasicBlock. inline GetElementPtrInst(Type *PointeeType, Value *Ptr, - ArrayRef IdxList, unsigned Values, + ArrayRef IdxList, AllocInfo AllocInfo, const Twine &NameStr, InsertPosition InsertBefore); void init(Value *Ptr, ArrayRef IdxList, const Twine &NameStr); @@ -949,8 +957,9 @@ class GetElementPtrInst : public Instruction { InsertPosition InsertBefore = nullptr) { unsigned Values = 1 + unsigned(IdxList.size()); assert(PointeeType && "Must specify element type"); - return new (Values) GetElementPtrInst(PointeeType, Ptr, IdxList, Values, - NameStr, InsertBefore); + IntrusiveOperandsAllocMarker AllocMarker{Values}; + return new (AllocMarker) GetElementPtrInst( + PointeeType, Ptr, IdxList, AllocMarker, NameStr, InsertBefore); } static GetElementPtrInst *Create(Type *PointeeType, Value *Ptr, @@ -1124,12 +1133,11 @@ struct OperandTraits : public VariadicOperandTraits {}; GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr, - ArrayRef IdxList, unsigned Values, - const Twine &NameStr, + ArrayRef IdxList, + AllocInfo AllocInfo, const Twine &NameStr, InsertPosition InsertBefore) - : Instruction(getGEPReturnType(Ptr, IdxList), GetElementPtr, - OperandTraits::op_end(this) - Values, - Values, InsertBefore), + : Instruction(getGEPReturnType(Ptr, IdxList), GetElementPtr, AllocInfo, + InsertBefore), SourceElementType(PointeeType), ResultElementType(getIndexedType(PointeeType, IdxList)) { init(Ptr, IdxList, NameStr); @@ -1403,26 +1411,29 @@ class FCmpInst: public CmpInst { /// hold the calling convention of the call. /// class CallInst : public CallBase { - CallInst(const CallInst &CI); + CallInst(const CallInst &CI, AllocInfo AllocInfo); /// Construct a CallInst from a range of arguments inline CallInst(FunctionType *Ty, Value *Func, ArrayRef Args, ArrayRef Bundles, const Twine &NameStr, - InsertPosition InsertBefore); + AllocInfo AllocInfo, InsertPosition InsertBefore); inline CallInst(FunctionType *Ty, Value *Func, ArrayRef Args, - const Twine &NameStr, InsertPosition InsertBefore) - : CallInst(Ty, Func, Args, std::nullopt, NameStr, InsertBefore) {} + const Twine &NameStr, AllocInfo AllocInfo, + InsertPosition InsertBefore) + : CallInst(Ty, Func, Args, std::nullopt, NameStr, AllocInfo, + InsertBefore) {} explicit CallInst(FunctionType *Ty, Value *F, const Twine &NameStr, - InsertPosition InsertBefore); + AllocInfo AllocInfo, InsertPosition InsertBefore); void init(FunctionType *FTy, Value *Func, ArrayRef Args, ArrayRef Bundles, const Twine &NameStr); void init(FunctionType *FTy, Value *Func, const Twine &NameStr); /// Compute the number of operands to allocate. - static int ComputeNumOperands(int NumArgs, int NumBundleInputs = 0) { + static unsigned ComputeNumOperands(unsigned NumArgs, + unsigned NumBundleInputs = 0) { // We need one operand for the called function, plus the input operand // counts provided. return 1 + NumArgs + NumBundleInputs; @@ -1437,26 +1448,29 @@ class CallInst : public CallBase { public: static CallInst *Create(FunctionType *Ty, Value *F, const Twine &NameStr = "", InsertPosition InsertBefore = nullptr) { - return new (ComputeNumOperands(0)) CallInst(Ty, F, NameStr, InsertBefore); + IntrusiveOperandsAllocMarker AllocMarker{ComputeNumOperands(0)}; + return new (AllocMarker) + CallInst(Ty, F, NameStr, AllocMarker, InsertBefore); } static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef Args, const Twine &NameStr, InsertPosition InsertBefore = nullptr) { - return new (ComputeNumOperands(Args.size())) - CallInst(Ty, Func, Args, std::nullopt, NameStr, InsertBefore); + IntrusiveOperandsAllocMarker AllocMarker{ComputeNumOperands(Args.size())}; + return new (AllocMarker) CallInst(Ty, Func, Args, std::nullopt, NameStr, + AllocMarker, InsertBefore); } static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef Args, ArrayRef Bundles = std::nullopt, const Twine &NameStr = "", InsertPosition InsertBefore = nullptr) { - const int NumOperands = - ComputeNumOperands(Args.size(), CountBundleInputs(Bundles)); - const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); + IntrusiveOperandsAndDescriptorAllocMarker AllocMarker{ + ComputeNumOperands(unsigned(Args.size()), CountBundleInputs(Bundles)), + unsigned(Bundles.size() * sizeof(BundleOpInfo))}; - return new (NumOperands, DescriptorBytes) - CallInst(Ty, Func, Args, Bundles, NameStr, InsertBefore); + return new (AllocMarker) + CallInst(Ty, Func, Args, Bundles, NameStr, AllocMarker, InsertBefore); } static CallInst *Create(FunctionCallee Func, const Twine &NameStr = "", @@ -1561,12 +1575,11 @@ class CallInst : public CallBase { CallInst::CallInst(FunctionType *Ty, Value *Func, ArrayRef Args, ArrayRef Bundles, const Twine &NameStr, - InsertPosition InsertBefore) - : CallBase(Ty->getReturnType(), Instruction::Call, - OperandTraits::op_end(this) - - (Args.size() + CountBundleInputs(Bundles) + 1), - unsigned(Args.size() + CountBundleInputs(Bundles) + 1), + AllocInfo AllocInfo, InsertPosition InsertBefore) + : CallBase(Ty->getReturnType(), Instruction::Call, AllocInfo, InsertBefore) { + assert(AllocInfo.NumOps == + unsigned(Args.size() + CountBundleInputs(Bundles) + 1)); init(Ty, Func, Args, Bundles, NameStr); } @@ -1577,10 +1590,11 @@ CallInst::CallInst(FunctionType *Ty, Value *Func, ArrayRef Args, /// This class represents the LLVM 'select' instruction. /// class SelectInst : public Instruction { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{3}; SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr, InsertPosition InsertBefore) - : Instruction(S1->getType(), Instruction::Select, &Op<0>(), 3, + : Instruction(S1->getType(), Instruction::Select, AllocMarker, InsertBefore) { init(C, S1, S2); setName(NameStr); @@ -1604,7 +1618,8 @@ class SelectInst : public Instruction { const Twine &NameStr = "", InsertPosition InsertBefore = nullptr, Instruction *MDFrom = nullptr) { - SelectInst *Sel = new(3) SelectInst(C, S1, S2, NameStr, InsertBefore); + SelectInst *Sel = + new (AllocMarker) SelectInst(C, S1, S2, NameStr, InsertBefore); if (MDFrom) Sel->copyMetadata(*MDFrom); return Sel; @@ -1693,6 +1708,8 @@ class VAArgInst : public UnaryInstruction { /// element from a VectorType value /// class ExtractElementInst : public Instruction { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{2}; + ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr = "", InsertPosition InsertBefore = nullptr); @@ -1706,7 +1723,8 @@ class ExtractElementInst : public Instruction { static ExtractElementInst *Create(Value *Vec, Value *Idx, const Twine &NameStr = "", InsertPosition InsertBefore = nullptr) { - return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertBefore); + return new (AllocMarker) + ExtractElementInst(Vec, Idx, NameStr, InsertBefore); } /// Return true if an extractelement instruction can be @@ -1749,6 +1767,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementInst, Value) /// element into a VectorType value /// class InsertElementInst : public Instruction { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{3}; + InsertElementInst(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr = "", InsertPosition InsertBefore = nullptr); @@ -1763,7 +1783,8 @@ class InsertElementInst : public Instruction { static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr = "", InsertPosition InsertBefore = nullptr) { - return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertBefore); + return new (AllocMarker) + InsertElementInst(Vec, NewElt, Idx, NameStr, InsertBefore); } /// Return true if an insertelement instruction can be @@ -1813,6 +1834,8 @@ constexpr int PoisonMaskElem = -1; /// For scalable vectors, all the elements of the mask must be 0 or -1. This /// requirement may be relaxed in the future. class ShuffleVectorInst : public Instruction { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{2}; + SmallVector ShuffleMask; Constant *ShuffleMaskForBitcode; @@ -1834,7 +1857,7 @@ class ShuffleVectorInst : public Instruction { const Twine &NameStr = "", InsertPosition InsertBefore = nullptr); - void *operator new(size_t S) { return User::operator new(S, 2); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { return User::operator delete(Ptr); } /// Swap the operands and adjust the mask to preserve the semantics @@ -2395,6 +2418,8 @@ ExtractValueInst::ExtractValueInst(Value *Agg, ArrayRef Idxs, /// value into an aggregate value. /// class InsertValueInst : public Instruction { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{2}; + SmallVector Indices; InsertValueInst(const InsertValueInst &IVI); @@ -2423,7 +2448,7 @@ class InsertValueInst : public Instruction { public: // allocate space for exactly two operands - void *operator new(size_t S) { return User::operator new(S, 2); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } static InsertValueInst *Create(Value *Agg, Value *Val, @@ -2493,9 +2518,7 @@ struct OperandTraits : InsertValueInst::InsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, const Twine &NameStr, InsertPosition InsertBefore) - : Instruction(Agg->getType(), InsertValue, - OperandTraits::op_begin(this), 2, - InsertBefore) { + : Instruction(Agg->getType(), InsertValue, AllocMarker, InsertBefore) { init(Agg, Val, Idxs, NameStr); } @@ -2510,6 +2533,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueInst, Value) // scientist's overactive imagination. // class PHINode : public Instruction { + constexpr static HungOffOperandsAllocMarker AllocMarker{}; + /// The number of operands actually allocated. NumOperands is /// the number actually in use. unsigned ReservedSpace; @@ -2519,7 +2544,7 @@ class PHINode : public Instruction { explicit PHINode(Type *Ty, unsigned NumReservedValues, const Twine &NameStr = "", InsertPosition InsertBefore = nullptr) - : Instruction(Ty, Instruction::PHI, nullptr, 0, InsertBefore), + : Instruction(Ty, Instruction::PHI, AllocMarker, InsertBefore), ReservedSpace(NumReservedValues) { assert(!Ty->isTokenTy() && "PHI nodes cannot have token type!"); setName(NameStr); @@ -2545,7 +2570,8 @@ class PHINode : public Instruction { static PHINode *Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr = "", InsertPosition InsertBefore = nullptr) { - return new PHINode(Ty, NumReservedValues, NameStr, InsertBefore); + return new (AllocMarker) + PHINode(Ty, NumReservedValues, NameStr, InsertBefore); } /// Provide fast operand accessors @@ -2749,6 +2775,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(PHINode, Value) class LandingPadInst : public Instruction { using CleanupField = BoolBitfieldElementT<0>; + constexpr static HungOffOperandsAllocMarker AllocMarker{}; + /// The number of operands actually allocated. NumOperands is /// the number actually in use. unsigned ReservedSpace; @@ -2763,7 +2791,7 @@ class LandingPadInst : public Instruction { const Twine &NameStr, InsertPosition InsertBefore); // Allocate space for exactly zero operands. - void *operator new(size_t S) { return User::operator new(S); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void growOperands(unsigned Size); void init(unsigned NumReservedValues, const Twine &NameStr); @@ -2843,7 +2871,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(LandingPadInst, Value) /// does not continue in this function any longer. /// class ReturnInst : public Instruction { - ReturnInst(const ReturnInst &RI); + ReturnInst(const ReturnInst &RI, AllocInfo AllocInfo); private: // ReturnInst constructors: @@ -2859,8 +2887,8 @@ class ReturnInst : public Instruction { // // NOTE: If the Value* passed is of type void then the constructor behaves as // if it was passed NULL. - explicit ReturnInst(LLVMContext &C, Value *retVal = nullptr, - InsertPosition InsertBefore = nullptr); + explicit ReturnInst(LLVMContext &C, Value *retVal, AllocInfo AllocInfo, + InsertPosition InsertBefore); protected: // Note: Instruction needs to be a friend here to call cloneImpl. @@ -2871,11 +2899,13 @@ class ReturnInst : public Instruction { public: static ReturnInst *Create(LLVMContext &C, Value *retVal = nullptr, InsertPosition InsertBefore = nullptr) { - return new(!!retVal) ReturnInst(C, retVal, InsertBefore); + IntrusiveOperandsAllocMarker AllocMarker{retVal ? 1U : 0U}; + return new (AllocMarker) ReturnInst(C, retVal, AllocMarker, InsertBefore); } static ReturnInst *Create(LLVMContext &C, BasicBlock *InsertAtEnd) { - return new (0) ReturnInst(C, nullptr, InsertAtEnd); + IntrusiveOperandsAllocMarker AllocMarker{0}; + return new (AllocMarker) ReturnInst(C, nullptr, AllocMarker, InsertAtEnd); } /// Provide fast operand accessors @@ -2923,7 +2953,7 @@ class BranchInst : public Instruction { /// [Cond, FalseDest,] TrueDest. This makes some accessors faster because /// they don't have to check for cond/uncond branchness. These are mostly /// accessed relative from op_end(). - BranchInst(const BranchInst &BI); + BranchInst(const BranchInst &BI, AllocInfo AllocInfo); // BranchInst constructors (where {B, T, F} are blocks, and C is a condition): // BranchInst(BB *B) - 'br B' // BranchInst(BB* T, BB *F, Value *C) - 'br C, T, F' @@ -2933,10 +2963,10 @@ class BranchInst : public Instruction { // BranchInst(BB* T, BB *F, Value *C, Inst *I) - 'br C, T, F', insert before I // BranchInst(BB* B, BB *I) - 'br B' insert at end // BranchInst(BB* T, BB *F, Value *C, BB *I) - 'br C, T, F', insert at end - explicit BranchInst(BasicBlock *IfTrue, - InsertPosition InsertBefore = nullptr); + explicit BranchInst(BasicBlock *IfTrue, AllocInfo AllocInfo, + InsertPosition InsertBefore); BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, - InsertPosition InsertBefore = nullptr); + AllocInfo AllocInfo, InsertPosition InsertBefore); void AssertOK(); @@ -2976,13 +3006,16 @@ class BranchInst : public Instruction { static BranchInst *Create(BasicBlock *IfTrue, InsertPosition InsertBefore = nullptr) { - return new(1) BranchInst(IfTrue, InsertBefore); + IntrusiveOperandsAllocMarker AllocMarker{1}; + return new (AllocMarker) BranchInst(IfTrue, AllocMarker, InsertBefore); } static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, InsertPosition InsertBefore = nullptr) { - return new(3) BranchInst(IfTrue, IfFalse, Cond, InsertBefore); + IntrusiveOperandsAllocMarker AllocMarker{3}; + return new (AllocMarker) + BranchInst(IfTrue, IfFalse, Cond, AllocMarker, InsertBefore); } /// Transparently provide more efficient getOperand methods. @@ -3054,6 +3087,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value) /// Multiway switch /// class SwitchInst : public Instruction { + constexpr static HungOffOperandsAllocMarker AllocMarker{}; + unsigned ReservedSpace; // Operand[0] = Value to switch on @@ -3070,7 +3105,7 @@ class SwitchInst : public Instruction { InsertPosition InsertBefore); // allocate space for exactly zero operands - void *operator new(size_t S) { return User::operator new(S); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void init(Value *Value, BasicBlock *Default, unsigned NumReserved); void growOperands(); @@ -3442,6 +3477,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value) /// Indirect Branch Instruction. /// class IndirectBrInst : public Instruction { + constexpr static HungOffOperandsAllocMarker AllocMarker{}; + unsigned ReservedSpace; // Operand[0] = Address to jump to @@ -3456,7 +3493,7 @@ class IndirectBrInst : public Instruction { InsertPosition InsertBefore); // allocate space for exactly zero operands - void *operator new(size_t S) { return User::operator new(S); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void init(Value *Address, unsigned NumDests); void growOperands(); @@ -3576,14 +3613,14 @@ class InvokeInst : public CallBase { /// The index from the end of the operand array to the unwind destination. static constexpr int UnwindDestOpEndIdx = -2; - InvokeInst(const InvokeInst &BI); + InvokeInst(const InvokeInst &BI, AllocInfo AllocInfo); /// Construct an InvokeInst given a range of arguments. /// /// Construct an InvokeInst from a range of arguments inline InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef Args, - ArrayRef Bundles, int NumOperands, + ArrayRef Bundles, AllocInfo AllocInfo, const Twine &NameStr, InsertPosition InsertBefore); void init(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, @@ -3591,10 +3628,11 @@ class InvokeInst : public CallBase { ArrayRef Bundles, const Twine &NameStr); /// Compute the number of operands to allocate. - static int ComputeNumOperands(int NumArgs, int NumBundleInputs = 0) { + static unsigned ComputeNumOperands(unsigned NumArgs, + size_t NumBundleInputs = 0) { // We need one operand for the called function, plus our extra operands and // the input operand counts provided. - return 1 + NumExtraOperands + NumArgs + NumBundleInputs; + return 1 + NumExtraOperands + NumArgs + unsigned(NumBundleInputs); } protected: @@ -3608,10 +3646,11 @@ class InvokeInst : public CallBase { BasicBlock *IfException, ArrayRef Args, const Twine &NameStr, InsertPosition InsertBefore = nullptr) { - int NumOperands = ComputeNumOperands(Args.size()); - return new (NumOperands) + IntrusiveOperandsAllocMarker AllocMarker{ + ComputeNumOperands(unsigned(Args.size()))}; + return new (AllocMarker) InvokeInst(Ty, Func, IfNormal, IfException, Args, std::nullopt, - NumOperands, NameStr, InsertBefore); + AllocMarker, NameStr, InsertBefore); } static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, @@ -3619,12 +3658,12 @@ class InvokeInst : public CallBase { ArrayRef Bundles = std::nullopt, const Twine &NameStr = "", InsertPosition InsertBefore = nullptr) { - int NumOperands = - ComputeNumOperands(Args.size(), CountBundleInputs(Bundles)); - unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); + IntrusiveOperandsAndDescriptorAllocMarker AllocMarker{ + ComputeNumOperands(Args.size(), CountBundleInputs(Bundles)), + unsigned(Bundles.size() * sizeof(BundleOpInfo))}; - return new (NumOperands, DescriptorBytes) - InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, NumOperands, + return new (AllocMarker) + InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, AllocMarker, NameStr, InsertBefore); } @@ -3709,10 +3748,9 @@ class InvokeInst : public CallBase { InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef Args, - ArrayRef Bundles, int NumOperands, + ArrayRef Bundles, AllocInfo AllocInfo, const Twine &NameStr, InsertPosition InsertBefore) - : CallBase(Ty->getReturnType(), Instruction::Invoke, - OperandTraits::op_end(this) - NumOperands, NumOperands, + : CallBase(Ty->getReturnType(), Instruction::Invoke, AllocInfo, InsertBefore) { init(Ty, Func, IfNormal, IfException, Args, Bundles, NameStr); } @@ -3729,7 +3767,7 @@ class CallBrInst : public CallBase { unsigned NumIndirectDests; - CallBrInst(const CallBrInst &BI); + CallBrInst(const CallBrInst &BI, AllocInfo AllocInfo); /// Construct a CallBrInst given a range of arguments. /// @@ -3737,7 +3775,7 @@ class CallBrInst : public CallBase { inline CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef IndirectDests, ArrayRef Args, ArrayRef Bundles, - int NumOperands, const Twine &NameStr, + AllocInfo AllocInfo, const Twine &NameStr, InsertPosition InsertBefore); void init(FunctionType *FTy, Value *Func, BasicBlock *DefaultDest, @@ -3745,11 +3783,11 @@ class CallBrInst : public CallBase { ArrayRef Bundles, const Twine &NameStr); /// Compute the number of operands to allocate. - static int ComputeNumOperands(int NumArgs, int NumIndirectDests, - int NumBundleInputs = 0) { + static unsigned ComputeNumOperands(int NumArgs, int NumIndirectDests, + int NumBundleInputs = 0) { // We need one operand for the called function, plus our extra operands and // the input operand counts provided. - return 2 + NumIndirectDests + NumArgs + NumBundleInputs; + return unsigned(2 + NumIndirectDests + NumArgs + NumBundleInputs); } protected: @@ -3764,10 +3802,11 @@ class CallBrInst : public CallBase { ArrayRef IndirectDests, ArrayRef Args, const Twine &NameStr, InsertPosition InsertBefore = nullptr) { - int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size()); - return new (NumOperands) + IntrusiveOperandsAllocMarker AllocMarker{ + ComputeNumOperands(Args.size(), IndirectDests.size())}; + return new (AllocMarker) CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, std::nullopt, - NumOperands, NameStr, InsertBefore); + AllocMarker, NameStr, InsertBefore); } static CallBrInst * @@ -3775,13 +3814,14 @@ class CallBrInst : public CallBase { ArrayRef IndirectDests, ArrayRef Args, ArrayRef Bundles = std::nullopt, const Twine &NameStr = "", InsertPosition InsertBefore = nullptr) { - int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size(), - CountBundleInputs(Bundles)); - unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); + IntrusiveOperandsAndDescriptorAllocMarker AllocMarker{ + ComputeNumOperands(Args.size(), IndirectDests.size(), + CountBundleInputs(Bundles)), + unsigned(Bundles.size() * sizeof(BundleOpInfo))}; - return new (NumOperands, DescriptorBytes) + return new (AllocMarker) CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, Bundles, - NumOperands, NameStr, InsertBefore); + AllocMarker, NameStr, InsertBefore); } static CallBrInst *Create(FunctionCallee Func, BasicBlock *DefaultDest, @@ -3881,10 +3921,9 @@ class CallBrInst : public CallBase { CallBrInst::CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef IndirectDests, ArrayRef Args, - ArrayRef Bundles, int NumOperands, + ArrayRef Bundles, AllocInfo AllocInfo, const Twine &NameStr, InsertPosition InsertBefore) - : CallBase(Ty->getReturnType(), Instruction::CallBr, - OperandTraits::op_end(this) - NumOperands, NumOperands, + : CallBase(Ty->getReturnType(), Instruction::CallBr, AllocInfo, InsertBefore) { init(Ty, Func, DefaultDest, IndirectDests, Args, Bundles, NameStr); } @@ -3897,6 +3936,8 @@ CallBrInst::CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, /// Resume the propagation of an exception. /// class ResumeInst : public Instruction { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{1}; + ResumeInst(const ResumeInst &RI); explicit ResumeInst(Value *Exn, InsertPosition InsertBefore = nullptr); @@ -3909,7 +3950,7 @@ class ResumeInst : public Instruction { public: static ResumeInst *Create(Value *Exn, InsertPosition InsertBefore = nullptr) { - return new(1) ResumeInst(Exn, InsertBefore); + return new (AllocMarker) ResumeInst(Exn, InsertBefore); } /// Provide fast operand accessors @@ -3951,6 +3992,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value) class CatchSwitchInst : public Instruction { using UnwindDestField = BoolBitfieldElementT<0>; + constexpr static HungOffOperandsAllocMarker AllocMarker{}; + /// The number of operands actually allocated. NumOperands is /// the number actually in use. unsigned ReservedSpace; @@ -3969,7 +4012,7 @@ class CatchSwitchInst : public Instruction { InsertPosition InsertBefore); // allocate space for exactly zero operands - void *operator new(size_t S) { return User::operator new(S); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void init(Value *ParentPad, BasicBlock *UnwindDest, unsigned NumReserved); void growOperands(unsigned Size); @@ -4114,9 +4157,9 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CatchSwitchInst, Value) class CleanupPadInst : public FuncletPadInst { private: explicit CleanupPadInst(Value *ParentPad, ArrayRef Args, - unsigned Values, const Twine &NameStr, + AllocInfo AllocInfo, const Twine &NameStr, InsertPosition InsertBefore) - : FuncletPadInst(Instruction::CleanupPad, ParentPad, Args, Values, + : FuncletPadInst(Instruction::CleanupPad, ParentPad, Args, AllocInfo, NameStr, InsertBefore) {} public: @@ -4124,9 +4167,9 @@ class CleanupPadInst : public FuncletPadInst { ArrayRef Args = std::nullopt, const Twine &NameStr = "", InsertPosition InsertBefore = nullptr) { - unsigned Values = 1 + Args.size(); - return new (Values) - CleanupPadInst(ParentPad, Args, Values, NameStr, InsertBefore); + IntrusiveOperandsAllocMarker AllocMarker{unsigned(1 + Args.size())}; + return new (AllocMarker) + CleanupPadInst(ParentPad, Args, AllocMarker, NameStr, InsertBefore); } /// Methods for support type inquiry through isa, cast, and dyn_cast: @@ -4144,18 +4187,18 @@ class CleanupPadInst : public FuncletPadInst { class CatchPadInst : public FuncletPadInst { private: explicit CatchPadInst(Value *CatchSwitch, ArrayRef Args, - unsigned Values, const Twine &NameStr, + AllocInfo AllocInfo, const Twine &NameStr, InsertPosition InsertBefore) - : FuncletPadInst(Instruction::CatchPad, CatchSwitch, Args, Values, + : FuncletPadInst(Instruction::CatchPad, CatchSwitch, Args, AllocInfo, NameStr, InsertBefore) {} public: static CatchPadInst *Create(Value *CatchSwitch, ArrayRef Args, const Twine &NameStr = "", InsertPosition InsertBefore = nullptr) { - unsigned Values = 1 + Args.size(); - return new (Values) - CatchPadInst(CatchSwitch, Args, Values, NameStr, InsertBefore); + IntrusiveOperandsAllocMarker AllocMarker{unsigned(1 + Args.size())}; + return new (AllocMarker) + CatchPadInst(CatchSwitch, Args, AllocMarker, NameStr, InsertBefore); } /// Convenience accessors @@ -4181,6 +4224,8 @@ class CatchPadInst : public FuncletPadInst { //===----------------------------------------------------------------------===// class CatchReturnInst : public Instruction { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{2}; + CatchReturnInst(const CatchReturnInst &RI); CatchReturnInst(Value *CatchPad, BasicBlock *BB, InsertPosition InsertBefore); @@ -4197,7 +4242,7 @@ class CatchReturnInst : public Instruction { InsertPosition InsertBefore = nullptr) { assert(CatchPad); assert(BB); - return new (2) CatchReturnInst(CatchPad, BB, InsertBefore); + return new (AllocMarker) CatchReturnInst(CatchPad, BB, InsertBefore); } /// Provide fast operand accessors @@ -4257,9 +4302,9 @@ class CleanupReturnInst : public Instruction { using UnwindDestField = BoolBitfieldElementT<0>; private: - CleanupReturnInst(const CleanupReturnInst &RI); - CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, unsigned Values, - InsertPosition InsertBefore = nullptr); + CleanupReturnInst(const CleanupReturnInst &RI, AllocInfo AllocInfo); + CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, + AllocInfo AllocInfo, InsertPosition InsertBefore = nullptr); void init(Value *CleanupPad, BasicBlock *UnwindBB); @@ -4277,8 +4322,9 @@ class CleanupReturnInst : public Instruction { unsigned Values = 1; if (UnwindBB) ++Values; - return new (Values) - CleanupReturnInst(CleanupPad, UnwindBB, Values, InsertBefore); + IntrusiveOperandsAllocMarker AllocMarker{Values}; + return new (AllocMarker) + CleanupReturnInst(CleanupPad, UnwindBB, AllocMarker, InsertBefore); } /// Provide fast operand accessors @@ -4350,6 +4396,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CleanupReturnInst, Value) /// end of the block cannot be reached. /// class UnreachableInst : public Instruction { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{0}; + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -4361,7 +4409,7 @@ class UnreachableInst : public Instruction { InsertPosition InsertBefore = nullptr); // allocate space for exactly zero operands - void *operator new(size_t S) { return User::operator new(S, 0); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } unsigned getNumSuccessors() const { return 0; } diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index c36e98f040ab81..f1017bdd512496 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -32,6 +32,9 @@ def int_dx_handle_fromBinding def int_dx_typedBufferLoad : DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty]>; +def int_dx_typedBufferLoad_checkbit + : DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty], + [llvm_any_ty, llvm_i32_ty]>; def int_dx_typedBufferStore : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty]>; diff --git a/llvm/include/llvm/IR/User.h b/llvm/include/llvm/IR/User.h index 910815f236abea..39e1314bd8130b 100644 --- a/llvm/include/llvm/IR/User.h +++ b/llvm/include/llvm/IR/User.h @@ -43,39 +43,86 @@ struct OperandTraits; class User : public Value { friend struct HungoffOperandTraits; + template friend struct ConstantAggrKeyType; LLVM_ATTRIBUTE_ALWAYS_INLINE static void * allocateFixedOperandUser(size_t, unsigned, unsigned); protected: + // Disable the default operator new, as all subclasses must use one of the + // custom operators below depending on how they store their operands. + void *operator new(size_t Size) = delete; + + /// Indicates this User has operands "hung off" in another allocation. + struct HungOffOperandsAllocMarker {}; + + /// Indicates this User has operands co-allocated. + struct IntrusiveOperandsAllocMarker { + /// The number of operands for this User. + const unsigned NumOps; + }; + + /// Indicates this User has operands and a descriptor co-allocated . + struct IntrusiveOperandsAndDescriptorAllocMarker { + /// The number of operands for this User. + const unsigned NumOps; + /// The number of bytes to allocate for the descriptor. Must be divisible by + /// `sizeof(void *)`. + const unsigned DescBytes; + }; + + /// Information about how a User object was allocated, to be passed into the + /// User constructor. + /// + /// DO NOT USE DIRECTLY. Use one of the `AllocMarker` structs instead, they + /// call all be implicitly converted to `AllocInfo`. + struct AllocInfo { + public: + const unsigned NumOps : NumUserOperandsBits; + const bool HasHungOffUses : 1; + const bool HasDescriptor : 1; + + AllocInfo() = delete; + + constexpr AllocInfo(const HungOffOperandsAllocMarker) + : NumOps(0), HasHungOffUses(true), HasDescriptor(false) {} + + constexpr AllocInfo(const IntrusiveOperandsAllocMarker Alloc) + : NumOps(Alloc.NumOps), HasHungOffUses(false), HasDescriptor(false) {} + + constexpr AllocInfo(const IntrusiveOperandsAndDescriptorAllocMarker Alloc) + : NumOps(Alloc.NumOps), HasHungOffUses(false), + HasDescriptor(Alloc.DescBytes != 0) {} + }; + /// Allocate a User with an operand pointer co-allocated. /// /// This is used for subclasses which need to allocate a variable number /// of operands, ie, 'hung off uses'. - void *operator new(size_t Size); + void *operator new(size_t Size, HungOffOperandsAllocMarker); /// Allocate a User with the operands co-allocated. /// /// This is used for subclasses which have a fixed number of operands. - void *operator new(size_t Size, unsigned Us); + void *operator new(size_t Size, IntrusiveOperandsAllocMarker allocTrait); /// Allocate a User with the operands co-allocated. If DescBytes is non-zero /// then allocate an additional DescBytes bytes before the operands. These /// bytes can be accessed by calling getDescriptor. - /// - /// DescBytes needs to be divisible by sizeof(void *). The allocated - /// descriptor, if any, is aligned to sizeof(void *) bytes. - /// - /// This is used for subclasses which have a fixed number of operands. - void *operator new(size_t Size, unsigned Us, unsigned DescBytes); - - User(Type *ty, unsigned vty, Use *, unsigned NumOps) - : Value(ty, vty) { - assert(NumOps < (1u << NumUserOperandsBits) && "Too many operands"); - NumUserOperands = NumOps; + void *operator new(size_t Size, + IntrusiveOperandsAndDescriptorAllocMarker allocTrait); + + User(Type *ty, unsigned vty, AllocInfo AllocInfo) : Value(ty, vty) { + assert(AllocInfo.NumOps < (1u << NumUserOperandsBits) && + "Too many operands"); + NumUserOperands = AllocInfo.NumOps; + assert((!AllocInfo.HasDescriptor || !AllocInfo.HasHungOffUses) && + "Cannot have both hung off uses and a descriptor"); + HasHungOffUses = AllocInfo.HasHungOffUses; + HasDescriptor = AllocInfo.HasDescriptor; // If we have hung off uses, then the operand list should initially be // null. - assert((!HasHungOffUses || !getOperandList()) && + assert((!AllocInfo.HasHungOffUses || !getOperandList()) && "Error in initializing hung off uses for User"); } @@ -98,7 +145,20 @@ class User : public Value { /// Free memory allocated for User and Use objects. void operator delete(void *Usr); /// Placement delete - required by std, called if the ctor throws. - void operator delete(void *Usr, unsigned) { + void operator delete(void *Usr, HungOffOperandsAllocMarker) { + // Note: If a subclass manipulates the information which is required to + // calculate the Usr memory pointer, e.g. NumUserOperands, the operator + // delete of that subclass has to restore the changed information to the + // original value, since the dtor of that class is not called if the ctor + // fails. + User::operator delete(Usr); + +#ifndef LLVM_ENABLE_EXCEPTIONS + llvm_unreachable("Constructor throws?"); +#endif + } + /// Placement delete - required by std, called if the ctor throws. + void operator delete(void *Usr, IntrusiveOperandsAllocMarker) { // Note: If a subclass manipulates the information which is required to calculate the // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has // to restore the changed information to the original value, since the dtor of that class @@ -110,7 +170,7 @@ class User : public Value { #endif } /// Placement delete - required by std, called if the ctor throws. - void operator delete(void *Usr, unsigned, unsigned) { + void operator delete(void *Usr, IntrusiveOperandsAndDescriptorAllocMarker) { // Note: If a subclass manipulates the information which is required to calculate the // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has // to restore the changed information to the original value, since the dtor of that class @@ -195,19 +255,6 @@ class User : public Value { /// Returns the descriptor co-allocated with this User instance. MutableArrayRef getDescriptor(); - /// Set the number of operands on a GlobalVariable. - /// - /// GlobalVariable always allocates space for a single operands, but - /// doesn't always use it. - /// - /// FIXME: As that the number of operands is used to find the start of - /// the allocated memory in operator delete, we need to always think we have - /// 1 operand before delete. - void setGlobalVariableNumOperands(unsigned NumOps) { - assert(NumOps <= 1 && "GlobalVariable can only have 0 or 1 operands"); - NumUserOperands = NumOps; - } - /// Subclasses with hung off uses need to manage the operand count /// themselves. In these instances, the operand count isn't used to find the /// OperandList, so there's no issue in having the operand count change. diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index 2fdbbbd094650f..95fe239555fb41 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -124,6 +124,7 @@ class ConstantAggregateZero; class ConstantPointerNull; class PoisonValue; class BlockAddress; +class ConstantTokenNone; class Context; class Function; class Instruction; @@ -1141,6 +1142,37 @@ class BlockAddress final : public Constant { } }; +// TODO: This should inherit from ConstantData. +class ConstantTokenNone final : public Constant { + ConstantTokenNone(llvm::ConstantTokenNone *C, Context &Ctx) + : Constant(ClassID::ConstantTokenNone, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// Return the ConstantTokenNone. + static ConstantTokenNone *get(Context &Ctx); + + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::ConstantTokenNone; + } + + unsigned getUseOperandNo(const Use &Use) const final { + llvm_unreachable("ConstantTokenNone has no operands!"); + } + +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && + "Expected a ConstantTokenNone!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + /// Iterator for `Instruction`s in a `BasicBlock. /// \Returns an sandboxir::Instruction & when derereferenced. class BBIterator { @@ -1506,6 +1538,10 @@ class SelectInst : public SingleLLVMInstructionImpl { static Value *create(Value *Cond, Value *True, Value *False, BasicBlock *InsertAtEnd, Context &Ctx, const Twine &Name = ""); + + const Value *getCondition() const { return getOperand(0); } + const Value *getTrueValue() const { return getOperand(1); } + const Value *getFalseValue() const { return getOperand(2); } Value *getCondition() { return getOperand(0); } Value *getTrueValue() { return getOperand(1); } Value *getFalseValue() { return getOperand(2); } @@ -1513,7 +1549,16 @@ class SelectInst : public SingleLLVMInstructionImpl { void setCondition(Value *New) { setOperand(0, New); } void setTrueValue(Value *New) { setOperand(1, New); } void setFalseValue(Value *New) { setOperand(2, New); } - void swapValues() { cast(Val)->swapValues(); } + void swapValues(); + + /// Return a string if the specified operands are invalid for a select + /// operation, otherwise return null. + static const char *areInvalidOperands(Value *Cond, Value *True, + Value *False) { + return llvm::SelectInst::areInvalidOperands(Cond->Val, True->Val, + False->Val); + } + /// For isa/dyn_cast. static bool classof(const Value *From); }; diff --git a/llvm/include/llvm/SandboxIR/SandboxIRValues.def b/llvm/include/llvm/SandboxIR/SandboxIRValues.def index c29e8be24ea754..bd2f533e880ac6 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIRValues.def +++ b/llvm/include/llvm/SandboxIR/SandboxIRValues.def @@ -35,6 +35,7 @@ DEF_CONST(ConstantPointerNull, ConstantPointerNull) DEF_CONST(UndefValue, UndefValue) DEF_CONST(PoisonValue, PoisonValue) DEF_CONST(BlockAddress, BlockAddress) +DEF_CONST(ConstantTokenNone, ConstantTokenNone) #ifndef DEF_INSTR #define DEF_INSTR(ID, OPCODE, CLASS) diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index e32a54fa346a9a..6d035d53732957 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -1267,9 +1267,9 @@ static Constant *getSequenceIfElementsMatch(Constant *C, } ConstantAggregate::ConstantAggregate(Type *T, ValueTy VT, - ArrayRef V) - : Constant(T, VT, OperandTraits::op_end(this) - V.size(), - V.size()) { + ArrayRef V, + AllocInfo AllocInfo) + : Constant(T, VT, AllocInfo) { llvm::copy(V, op_begin()); // Check that types match, unless this is an opaque struct. @@ -1282,8 +1282,9 @@ ConstantAggregate::ConstantAggregate(Type *T, ValueTy VT, } } -ConstantArray::ConstantArray(ArrayType *T, ArrayRef V) - : ConstantAggregate(T, ConstantArrayVal, V) { +ConstantArray::ConstantArray(ArrayType *T, ArrayRef V, + AllocInfo AllocInfo) + : ConstantAggregate(T, ConstantArrayVal, V, AllocInfo) { assert(V.size() == T->getNumElements() && "Invalid initializer for constant array"); } @@ -1346,8 +1347,9 @@ StructType *ConstantStruct::getTypeForElements(ArrayRef V, return getTypeForElements(V[0]->getContext(), V, Packed); } -ConstantStruct::ConstantStruct(StructType *T, ArrayRef V) - : ConstantAggregate(T, ConstantStructVal, V) { +ConstantStruct::ConstantStruct(StructType *T, ArrayRef V, + AllocInfo AllocInfo) + : ConstantAggregate(T, ConstantStructVal, V, AllocInfo) { assert((T->isOpaque() || V.size() == T->getNumElements()) && "Invalid initializer for constant struct"); } @@ -1388,8 +1390,9 @@ Constant *ConstantStruct::get(StructType *ST, ArrayRef V) { return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V); } -ConstantVector::ConstantVector(VectorType *T, ArrayRef V) - : ConstantAggregate(T, ConstantVectorVal, V) { +ConstantVector::ConstantVector(VectorType *T, ArrayRef V, + AllocInfo AllocInfo) + : ConstantAggregate(T, ConstantVectorVal, V, AllocInfo) { assert(V.size() == cast(T)->getNumElements() && "Invalid initializer for constant vector"); } @@ -1879,7 +1882,7 @@ BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) { BlockAddress::BlockAddress(Function *F, BasicBlock *BB) : Constant(PointerType::get(F->getContext(), F->getAddressSpace()), - Value::BlockAddressVal, &Op<0>(), 2) { + Value::BlockAddressVal, AllocMarker) { setOperand(0, F); setOperand(1, BB); BB->AdjustBlockAddressRefCount(1); @@ -1951,7 +1954,7 @@ DSOLocalEquivalent *DSOLocalEquivalent::get(GlobalValue *GV) { } DSOLocalEquivalent::DSOLocalEquivalent(GlobalValue *GV) - : Constant(GV->getType(), Value::DSOLocalEquivalentVal, &Op<0>(), 1) { + : Constant(GV->getType(), Value::DSOLocalEquivalentVal, AllocMarker) { setOperand(0, GV); } @@ -2009,7 +2012,7 @@ NoCFIValue *NoCFIValue::get(GlobalValue *GV) { } NoCFIValue::NoCFIValue(GlobalValue *GV) - : Constant(GV->getType(), Value::NoCFIValueVal, &Op<0>(), 1) { + : Constant(GV->getType(), Value::NoCFIValueVal, AllocMarker) { setOperand(0, GV); } @@ -2056,7 +2059,7 @@ ConstantPtrAuth *ConstantPtrAuth::getWithSameSchema(Constant *Pointer) const { ConstantPtrAuth::ConstantPtrAuth(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc) - : Constant(Ptr->getType(), Value::ConstantPtrAuthVal, &Op<0>(), 4) { + : Constant(Ptr->getType(), Value::ConstantPtrAuthVal, AllocMarker) { assert(Ptr->getType()->isPointerTy()); assert(Key->getBitWidth() == 32); assert(Disc->getBitWidth() == 64); @@ -2758,11 +2761,8 @@ const char *ConstantExpr::getOpcodeName() const { GetElementPtrConstantExpr::GetElementPtrConstantExpr( Type *SrcElementTy, Constant *C, ArrayRef IdxList, Type *DestTy, - std::optional InRange) - : ConstantExpr(DestTy, Instruction::GetElementPtr, - OperandTraits::op_end(this) - - (IdxList.size() + 1), - IdxList.size() + 1), + std::optional InRange, AllocInfo AllocInfo) + : ConstantExpr(DestTy, Instruction::GetElementPtr, AllocInfo), SrcElementTy(SrcElementTy), ResElementTy(GetElementPtrInst::getIndexedType(SrcElementTy, IdxList)), InRange(std::move(InRange)) { diff --git a/llvm/lib/IR/ConstantsContext.h b/llvm/lib/IR/ConstantsContext.h index bd19ec6b9dcac0..6afc86ffc73abc 100644 --- a/llvm/lib/IR/ConstantsContext.h +++ b/llvm/lib/IR/ConstantsContext.h @@ -44,14 +44,16 @@ namespace llvm { /// CastConstantExpr - This class is private to Constants.cpp, and is used /// behind the scenes to implement cast constant exprs. class CastConstantExpr final : public ConstantExpr { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{1}; + public: CastConstantExpr(unsigned Opcode, Constant *C, Type *Ty) - : ConstantExpr(Ty, Opcode, &Op<0>(), 1) { + : ConstantExpr(Ty, Opcode, AllocMarker) { Op<0>() = C; } // allocate space for exactly one operand - void *operator new(size_t S) { return User::operator new(S, 1); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); @@ -67,17 +69,19 @@ class CastConstantExpr final : public ConstantExpr { /// BinaryConstantExpr - This class is private to Constants.cpp, and is used /// behind the scenes to implement binary constant exprs. class BinaryConstantExpr final : public ConstantExpr { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{2}; + public: BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags) - : ConstantExpr(C1->getType(), Opcode, &Op<0>(), 2) { + : ConstantExpr(C1->getType(), Opcode, AllocMarker) { Op<0>() = C1; Op<1>() = C2; SubclassOptionalData = Flags; } // allocate space for exactly two operands - void *operator new(size_t S) { return User::operator new(S, 2); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } /// Transparently provide more efficient getOperand methods. @@ -95,16 +99,18 @@ class BinaryConstantExpr final : public ConstantExpr { /// Constants.cpp, and is used behind the scenes to implement /// extractelement constant exprs. class ExtractElementConstantExpr final : public ConstantExpr { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{2}; + public: ExtractElementConstantExpr(Constant *C1, Constant *C2) - : ConstantExpr(cast(C1->getType())->getElementType(), - Instruction::ExtractElement, &Op<0>(), 2) { + : ConstantExpr(cast(C1->getType())->getElementType(), + Instruction::ExtractElement, AllocMarker) { Op<0>() = C1; Op<1>() = C2; } // allocate space for exactly two operands - void *operator new(size_t S) { return User::operator new(S, 2); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } /// Transparently provide more efficient getOperand methods. @@ -122,17 +128,18 @@ class ExtractElementConstantExpr final : public ConstantExpr { /// Constants.cpp, and is used behind the scenes to implement /// insertelement constant exprs. class InsertElementConstantExpr final : public ConstantExpr { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{3}; + public: InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3) - : ConstantExpr(C1->getType(), Instruction::InsertElement, - &Op<0>(), 3) { + : ConstantExpr(C1->getType(), Instruction::InsertElement, AllocMarker) { Op<0>() = C1; Op<1>() = C2; Op<2>() = C3; } // allocate space for exactly three operands - void *operator new(size_t S) { return User::operator new(S, 3); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { User::operator delete(Ptr); } /// Transparently provide more efficient getOperand methods. @@ -150,12 +157,14 @@ class InsertElementConstantExpr final : public ConstantExpr { /// Constants.cpp, and is used behind the scenes to implement /// shufflevector constant exprs. class ShuffleVectorConstantExpr final : public ConstantExpr { + constexpr static IntrusiveOperandsAllocMarker AllocMarker{2}; + public: ShuffleVectorConstantExpr(Constant *C1, Constant *C2, ArrayRef Mask) : ConstantExpr(VectorType::get( cast(C1->getType())->getElementType(), Mask.size(), isa(C1->getType())), - Instruction::ShuffleVector, &Op<0>(), 2) { + Instruction::ShuffleVector, AllocMarker) { assert(ShuffleVectorInst::isValidOperands(C1, C2, Mask) && "Invalid shuffle vector instruction operands!"); Op<0>() = C1; @@ -168,7 +177,7 @@ class ShuffleVectorConstantExpr final : public ConstantExpr { SmallVector ShuffleMask; Constant *ShuffleMaskForBitcode; - void *operator new(size_t S) { return User::operator new(S, 2); } + void *operator new(size_t S) { return User::operator new(S, AllocMarker); } void operator delete(void *Ptr) { return User::operator delete(Ptr); } /// Transparently provide more efficient getOperand methods. @@ -191,15 +200,17 @@ class GetElementPtrConstantExpr : public ConstantExpr { GetElementPtrConstantExpr(Type *SrcElementTy, Constant *C, ArrayRef IdxList, Type *DestTy, - std::optional InRange); + std::optional InRange, + AllocInfo AllocInfo); public: static GetElementPtrConstantExpr * Create(Type *SrcElementTy, Constant *C, ArrayRef IdxList, Type *DestTy, unsigned Flags, std::optional InRange) { - GetElementPtrConstantExpr *Result = new (IdxList.size() + 1) + IntrusiveOperandsAllocMarker AllocMarker{unsigned(IdxList.size() + 1)}; + GetElementPtrConstantExpr *Result = new (AllocMarker) GetElementPtrConstantExpr(SrcElementTy, C, IdxList, DestTy, - std::move(InRange)); + std::move(InRange), AllocMarker); Result->SubclassOptionalData = Flags; return Result; } @@ -318,7 +329,8 @@ template struct ConstantAggrKeyType { using TypeClass = typename ConstantInfo::TypeClass; ConstantClass *create(TypeClass *Ty) const { - return new (Operands.size()) ConstantClass(Ty, Operands); + User::IntrusiveOperandsAllocMarker AllocMarker{unsigned(Operands.size())}; + return new (AllocMarker) ConstantClass(Ty, Operands, AllocMarker); } }; diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index afef8930669e84..82ff4e1bc7f5c5 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -402,7 +402,7 @@ Function *Function::createWithDefaultAttr(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N, Module *M) { - auto *F = new Function(Ty, Linkage, AddrSpace, N, M); + auto *F = new (AllocMarker) Function(Ty, Linkage, AddrSpace, N, M); AttrBuilder B(F->getContext()); UWTableKind UWTable = M->getUwtable(); if (UWTable != UWTableKind::None) @@ -501,8 +501,7 @@ static unsigned computeAddrSpace(unsigned AddrSpace, Module *M) { Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &name, Module *ParentModule) - : GlobalObject(Ty, Value::FunctionVal, - OperandTraits::op_begin(this), 0, Linkage, name, + : GlobalObject(Ty, Value::FunctionVal, AllocMarker, Linkage, name, computeAddrSpace(AddrSpace, ParentModule)), NumArgs(Ty->getNumParams()), IsNewDbgInfoFormat(UseNewDbgInfoFormat) { assert(FunctionType::isValidReturnType(getReturnType()) && diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 2bc69cdb712b0a..99f4fa50e9c433 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -442,9 +442,8 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, Constant *InitVal, const Twine &Name, ThreadLocalMode TLMode, unsigned AddressSpace, bool isExternallyInitialized) - : GlobalObject(Ty, Value::GlobalVariableVal, - OperandTraits::op_begin(this), - InitVal != nullptr, Link, Name, AddressSpace), + : GlobalObject(Ty, Value::GlobalVariableVal, AllocMarker, Link, Name, + AddressSpace), isConstantGlobal(constant), isExternallyInitializedConstant(isExternallyInitialized) { assert(!Ty->isFunctionTy() && PointerType::isValidElementType(Ty) && @@ -454,6 +453,8 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, assert(InitVal->getType() == Ty && "Initializer should be the same type as the GlobalVariable!"); Op<0>() = InitVal; + } else { + setGlobalVariableNumOperands(0); } } @@ -540,7 +541,7 @@ void GlobalVariable::setCodeModel(CodeModel::Model CM) { GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link, const Twine &Name, Constant *Aliasee, Module *ParentModule) - : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name, + : GlobalValue(Ty, Value::GlobalAliasVal, AllocMarker, Link, Name, AddressSpace) { setAliasee(Aliasee); if (ParentModule) @@ -597,7 +598,7 @@ const GlobalObject *GlobalAlias::getAliaseeObject() const { GlobalIFunc::GlobalIFunc(Type *Ty, unsigned AddressSpace, LinkageTypes Link, const Twine &Name, Constant *Resolver, Module *ParentModule) - : GlobalObject(Ty, Value::GlobalIFuncVal, &Op<0>(), 1, Link, Name, + : GlobalObject(Ty, Value::GlobalIFuncVal, AllocMarker, Link, Name, AddressSpace) { setResolver(Resolver); if (ParentModule) diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 62d88ce21657b2..b1c2b0200c8269 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -32,9 +32,9 @@ InsertPosition::InsertPosition(Instruction *InsertBefore) InsertPosition::InsertPosition(BasicBlock *InsertAtEnd) : InsertAt(InsertAtEnd ? InsertAtEnd->end() : InstListType::iterator()) {} -Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps, +Instruction::Instruction(Type *ty, unsigned it, AllocInfo AllocInfo, InsertPosition InsertBefore) - : User(ty, Value::InstructionVal + it, Ops, NumOps) { + : User(ty, Value::InstructionVal + it, AllocInfo) { // When called with an iterator, there must be a block to insert into. if (InstListType::iterator InsertIt = InsertBefore; InsertIt.isValid()) { BasicBlock *BB = InsertIt.getNodeParent(); diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 19da1f60d424d2..e95b98a6404432 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -121,8 +121,9 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) { //===----------------------------------------------------------------------===// PHINode::PHINode(const PHINode &PN) - : Instruction(PN.getType(), Instruction::PHI, nullptr, PN.getNumOperands()), + : Instruction(PN.getType(), Instruction::PHI, AllocMarker), ReservedSpace(PN.getNumOperands()) { + NumUserOperands = PN.getNumOperands(); allocHungoffUses(PN.getNumOperands()); std::copy(PN.op_begin(), PN.op_end(), op_begin()); copyIncomingBlocks(make_range(PN.block_begin(), PN.block_end())); @@ -243,14 +244,14 @@ bool PHINode::hasConstantOrUndefValue() const { LandingPadInst::LandingPadInst(Type *RetTy, unsigned NumReservedValues, const Twine &NameStr, InsertPosition InsertBefore) - : Instruction(RetTy, Instruction::LandingPad, nullptr, 0, InsertBefore) { + : Instruction(RetTy, Instruction::LandingPad, AllocMarker, InsertBefore) { init(NumReservedValues, NameStr); } LandingPadInst::LandingPadInst(const LandingPadInst &LP) - : Instruction(LP.getType(), Instruction::LandingPad, nullptr, - LP.getNumOperands()), + : Instruction(LP.getType(), Instruction::LandingPad, AllocMarker), ReservedSpace(LP.getNumOperands()) { + NumUserOperands = LP.getNumOperands(); allocHungoffUses(LP.getNumOperands()); Use *OL = getOperandList(); const Use *InOL = LP.getOperandList(); @@ -716,16 +717,16 @@ void CallInst::init(FunctionType *FTy, Value *Func, const Twine &NameStr) { } CallInst::CallInst(FunctionType *Ty, Value *Func, const Twine &Name, - InsertPosition InsertBefore) - : CallBase(Ty->getReturnType(), Instruction::Call, - OperandTraits::op_end(this) - 1, 1, InsertBefore) { + AllocInfo AllocInfo, InsertPosition InsertBefore) + : CallBase(Ty->getReturnType(), Instruction::Call, AllocInfo, + InsertBefore) { init(Ty, Func, Name); } -CallInst::CallInst(const CallInst &CI) - : CallBase(CI.Attrs, CI.FTy, CI.getType(), Instruction::Call, - OperandTraits::op_end(this) - CI.getNumOperands(), - CI.getNumOperands()) { +CallInst::CallInst(const CallInst &CI, AllocInfo AllocInfo) + : CallBase(CI.Attrs, CI.FTy, CI.getType(), Instruction::Call, AllocInfo) { + assert(getNumOperands() == CI.getNumOperands() && + "Wrong number of operands allocated"); setTailCallKind(CI.getTailCallKind()); setCallingConv(CI.getCallingConv()); @@ -774,7 +775,7 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal, const Twine &NameStr) { this->FTy = FTy; - assert((int)getNumOperands() == + assert(getNumOperands() == ComputeNumOperands(Args.size(), CountBundleInputs(Bundles)) && "NumOperands not set up?"); @@ -803,10 +804,10 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal, setName(NameStr); } -InvokeInst::InvokeInst(const InvokeInst &II) - : CallBase(II.Attrs, II.FTy, II.getType(), Instruction::Invoke, - OperandTraits::op_end(this) - II.getNumOperands(), - II.getNumOperands()) { +InvokeInst::InvokeInst(const InvokeInst &II, AllocInfo AllocInfo) + : CallBase(II.Attrs, II.FTy, II.getType(), Instruction::Invoke, AllocInfo) { + assert(getNumOperands() == II.getNumOperands() && + "Wrong number of operands allocated"); setCallingConv(II.getCallingConv()); std::copy(II.op_begin(), II.op_end(), op_begin()); std::copy(II.bundle_op_info_begin(), II.bundle_op_info_end(), @@ -855,9 +856,9 @@ void CallBrInst::init(FunctionType *FTy, Value *Fn, BasicBlock *Fallthrough, const Twine &NameStr) { this->FTy = FTy; - assert((int)getNumOperands() == - ComputeNumOperands(Args.size(), IndirectDests.size(), - CountBundleInputs(Bundles)) && + assert(getNumOperands() == ComputeNumOperands(Args.size(), + IndirectDests.size(), + CountBundleInputs(Bundles)) && "NumOperands not set up?"); #ifndef NDEBUG @@ -887,10 +888,11 @@ void CallBrInst::init(FunctionType *FTy, Value *Fn, BasicBlock *Fallthrough, setName(NameStr); } -CallBrInst::CallBrInst(const CallBrInst &CBI) +CallBrInst::CallBrInst(const CallBrInst &CBI, AllocInfo AllocInfo) : CallBase(CBI.Attrs, CBI.FTy, CBI.getType(), Instruction::CallBr, - OperandTraits::op_end(this) - CBI.getNumOperands(), - CBI.getNumOperands()) { + AllocInfo) { + assert(getNumOperands() == CBI.getNumOperands() && + "Wrong number of operands allocated"); setCallingConv(CBI.getCallingConv()); std::copy(CBI.op_begin(), CBI.op_end(), op_begin()); std::copy(CBI.bundle_op_info_begin(), CBI.bundle_op_info_end(), @@ -918,19 +920,19 @@ CallBrInst *CallBrInst::Create(CallBrInst *CBI, ArrayRef OpB, // ReturnInst Implementation //===----------------------------------------------------------------------===// -ReturnInst::ReturnInst(const ReturnInst &RI) +ReturnInst::ReturnInst(const ReturnInst &RI, AllocInfo AllocInfo) : Instruction(Type::getVoidTy(RI.getContext()), Instruction::Ret, - OperandTraits::op_end(this) - RI.getNumOperands(), - RI.getNumOperands()) { + AllocInfo) { + assert(getNumOperands() == RI.getNumOperands() && + "Wrong number of operands allocated"); if (RI.getNumOperands()) Op<0>() = RI.Op<0>(); SubclassOptionalData = RI.SubclassOptionalData; } -ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, +ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, AllocInfo AllocInfo, InsertPosition InsertBefore) - : Instruction(Type::getVoidTy(C), Instruction::Ret, - OperandTraits::op_end(this) - !!retVal, !!retVal, + : Instruction(Type::getVoidTy(C), Instruction::Ret, AllocInfo, InsertBefore) { if (retVal) Op<0>() = retVal; @@ -942,13 +944,13 @@ ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, ResumeInst::ResumeInst(const ResumeInst &RI) : Instruction(Type::getVoidTy(RI.getContext()), Instruction::Resume, - OperandTraits::op_begin(this), 1) { + AllocMarker) { Op<0>() = RI.Op<0>(); } ResumeInst::ResumeInst(Value *Exn, InsertPosition InsertBefore) : Instruction(Type::getVoidTy(Exn->getContext()), Instruction::Resume, - OperandTraits::op_begin(this), 1, InsertBefore) { + AllocMarker, InsertBefore) { Op<0>() = Exn; } @@ -956,11 +958,11 @@ ResumeInst::ResumeInst(Value *Exn, InsertPosition InsertBefore) // CleanupReturnInst Implementation //===----------------------------------------------------------------------===// -CleanupReturnInst::CleanupReturnInst(const CleanupReturnInst &CRI) - : Instruction(CRI.getType(), Instruction::CleanupRet, - OperandTraits::op_end(this) - - CRI.getNumOperands(), - CRI.getNumOperands()) { +CleanupReturnInst::CleanupReturnInst(const CleanupReturnInst &CRI, + AllocInfo AllocInfo) + : Instruction(CRI.getType(), Instruction::CleanupRet, AllocInfo) { + assert(getNumOperands() == CRI.getNumOperands() && + "Wrong number of operands allocated"); setSubclassData( CRI.getSubclassData()); Op<0>() = CRI.Op<0>(); @@ -978,12 +980,10 @@ void CleanupReturnInst::init(Value *CleanupPad, BasicBlock *UnwindBB) { } CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, - unsigned Values, + AllocInfo AllocInfo, InsertPosition InsertBefore) : Instruction(Type::getVoidTy(CleanupPad->getContext()), - Instruction::CleanupRet, - OperandTraits::op_end(this) - Values, - Values, InsertBefore) { + Instruction::CleanupRet, AllocInfo, InsertBefore) { init(CleanupPad, UnwindBB); } @@ -997,7 +997,7 @@ void CatchReturnInst::init(Value *CatchPad, BasicBlock *BB) { CatchReturnInst::CatchReturnInst(const CatchReturnInst &CRI) : Instruction(Type::getVoidTy(CRI.getContext()), Instruction::CatchRet, - OperandTraits::op_begin(this), 2) { + AllocMarker) { Op<0>() = CRI.Op<0>(); Op<1>() = CRI.Op<1>(); } @@ -1005,8 +1005,7 @@ CatchReturnInst::CatchReturnInst(const CatchReturnInst &CRI) CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB, InsertPosition InsertBefore) : Instruction(Type::getVoidTy(BB->getContext()), Instruction::CatchRet, - OperandTraits::op_begin(this), 2, - InsertBefore) { + AllocMarker, InsertBefore) { init(CatchPad, BB); } @@ -1018,7 +1017,7 @@ CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest, unsigned NumReservedValues, const Twine &NameStr, InsertPosition InsertBefore) - : Instruction(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0, + : Instruction(ParentPad->getType(), Instruction::CatchSwitch, AllocMarker, InsertBefore) { if (UnwindDest) ++NumReservedValues; @@ -1027,8 +1026,8 @@ CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest, } CatchSwitchInst::CatchSwitchInst(const CatchSwitchInst &CSI) - : Instruction(CSI.getType(), Instruction::CatchSwitch, nullptr, - CSI.getNumOperands()) { + : Instruction(CSI.getType(), Instruction::CatchSwitch, AllocMarker) { + NumUserOperands = CSI.NumUserOperands; init(CSI.getParentPad(), CSI.getUnwindDest(), CSI.getNumOperands()); setNumHungOffUseOperands(ReservedSpace); Use *OL = getOperandList(); @@ -1093,22 +1092,19 @@ void FuncletPadInst::init(Value *ParentPad, ArrayRef Args, setName(NameStr); } -FuncletPadInst::FuncletPadInst(const FuncletPadInst &FPI) - : Instruction(FPI.getType(), FPI.getOpcode(), - OperandTraits::op_end(this) - - FPI.getNumOperands(), - FPI.getNumOperands()) { +FuncletPadInst::FuncletPadInst(const FuncletPadInst &FPI, AllocInfo AllocInfo) + : Instruction(FPI.getType(), FPI.getOpcode(), AllocInfo) { + assert(getNumOperands() == FPI.getNumOperands() && + "Wrong number of operands allocated"); std::copy(FPI.op_begin(), FPI.op_end(), op_begin()); setParentPad(FPI.getParentPad()); } FuncletPadInst::FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad, - ArrayRef Args, unsigned Values, + ArrayRef Args, AllocInfo AllocInfo, const Twine &NameStr, InsertPosition InsertBefore) - : Instruction(ParentPad->getType(), Op, - OperandTraits::op_end(this) - Values, Values, - InsertBefore) { + : Instruction(ParentPad->getType(), Op, AllocInfo, InsertBefore) { init(ParentPad, Args, NameStr); } @@ -1118,8 +1114,8 @@ FuncletPadInst::FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad, UnreachableInst::UnreachableInst(LLVMContext &Context, InsertPosition InsertBefore) - : Instruction(Type::getVoidTy(Context), Instruction::Unreachable, nullptr, - 0, InsertBefore) {} + : Instruction(Type::getVoidTy(Context), Instruction::Unreachable, + AllocMarker, InsertBefore) {} //===----------------------------------------------------------------------===// // BranchInst Implementation @@ -1131,19 +1127,18 @@ void BranchInst::AssertOK() { "May only branch on boolean predicates!"); } -BranchInst::BranchInst(BasicBlock *IfTrue, InsertPosition InsertBefore) +BranchInst::BranchInst(BasicBlock *IfTrue, AllocInfo AllocInfo, + InsertPosition InsertBefore) : Instruction(Type::getVoidTy(IfTrue->getContext()), Instruction::Br, - OperandTraits::op_end(this) - 1, 1, - InsertBefore) { + AllocInfo, InsertBefore) { assert(IfTrue && "Branch destination may not be null!"); Op<-1>() = IfTrue; } BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, - InsertPosition InsertBefore) + AllocInfo AllocInfo, InsertPosition InsertBefore) : Instruction(Type::getVoidTy(IfTrue->getContext()), Instruction::Br, - OperandTraits::op_end(this) - 3, 3, - InsertBefore) { + AllocInfo, InsertBefore) { // Assign in order of operand index to make use-list order predictable. Op<-3>() = Cond; Op<-2>() = IfFalse; @@ -1153,10 +1148,11 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, #endif } -BranchInst::BranchInst(const BranchInst &BI) +BranchInst::BranchInst(const BranchInst &BI, AllocInfo AllocInfo) : Instruction(Type::getVoidTy(BI.getContext()), Instruction::Br, - OperandTraits::op_end(this) - BI.getNumOperands(), - BI.getNumOperands()) { + AllocInfo) { + assert(getNumOperands() == BI.getNumOperands() && + "Wrong number of operands allocated"); // Assign in order of operand index to make use-list order predictable. if (BI.getNumOperands() != 1) { assert(BI.getNumOperands() == 3 && "BR can have 1 or 3 operands!"); @@ -1313,9 +1309,8 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Align Align, StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Align Align, AtomicOrdering Order, SyncScope::ID SSID, InsertPosition InsertBefore) - : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits::op_begin(this), - OperandTraits::operands(this), InsertBefore) { + : Instruction(Type::getVoidTy(val->getContext()), Store, AllocMarker, + InsertBefore) { Op<0>() = val; Op<1>() = addr; setVolatile(isVolatile); @@ -1356,8 +1351,7 @@ AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, InsertPosition InsertBefore) : Instruction( StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext())), - AtomicCmpXchg, OperandTraits::op_begin(this), - OperandTraits::operands(this), InsertBefore) { + AtomicCmpXchg, AllocMarker, InsertBefore) { Init(Ptr, Cmp, NewVal, Alignment, SuccessOrdering, FailureOrdering, SSID); } @@ -1389,9 +1383,7 @@ void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val, AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, Align Alignment, AtomicOrdering Ordering, SyncScope::ID SSID, InsertPosition InsertBefore) - : Instruction(Val->getType(), AtomicRMW, - OperandTraits::op_begin(this), - OperandTraits::operands(this), InsertBefore) { + : Instruction(Val->getType(), AtomicRMW, AllocMarker, InsertBefore) { Init(Operation, Ptr, Val, Alignment, Ordering, SSID); } @@ -1448,7 +1440,7 @@ StringRef AtomicRMWInst::getOperationName(BinOp Op) { FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, SyncScope::ID SSID, InsertPosition InsertBefore) - : Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertBefore) { + : Instruction(Type::getVoidTy(C), Fence, AllocMarker, InsertBefore) { setOrdering(Ordering); setSyncScopeID(SSID); } @@ -1466,13 +1458,13 @@ void GetElementPtrInst::init(Value *Ptr, ArrayRef IdxList, setName(Name); } -GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI) - : Instruction(GEPI.getType(), GetElementPtr, - OperandTraits::op_end(this) - - GEPI.getNumOperands(), - GEPI.getNumOperands()), +GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI, + AllocInfo AllocInfo) + : Instruction(GEPI.getType(), GetElementPtr, AllocInfo), SourceElementType(GEPI.SourceElementType), ResultElementType(GEPI.ResultElementType) { + assert(getNumOperands() == GEPI.getNumOperands() && + "Wrong number of operands allocated"); std::copy(GEPI.op_begin(), GEPI.op_end(), op_begin()); SubclassOptionalData = GEPI.SubclassOptionalData; } @@ -1606,9 +1598,8 @@ bool GetElementPtrInst::collectOffset( ExtractElementInst::ExtractElementInst(Value *Val, Value *Index, const Twine &Name, InsertPosition InsertBef) - : Instruction( - cast(Val->getType())->getElementType(), ExtractElement, - OperandTraits::op_begin(this), 2, InsertBef) { + : Instruction(cast(Val->getType())->getElementType(), + ExtractElement, AllocMarker, InsertBef) { assert(isValidOperands(Val, Index) && "Invalid extractelement instruction operands!"); Op<0>() = Val; @@ -1629,9 +1620,7 @@ bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) { InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index, const Twine &Name, InsertPosition InsertBef) - : Instruction(Vec->getType(), InsertElement, - OperandTraits::op_begin(this), 3, - InsertBef) { + : Instruction(Vec->getType(), InsertElement, AllocMarker, InsertBef) { assert(isValidOperands(Vec, Elt, Index) && "Invalid insertelement instruction operands!"); Op<0>() = Vec; @@ -1679,8 +1668,7 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, : Instruction( VectorType::get(cast(V1->getType())->getElementType(), cast(Mask->getType())->getElementCount()), - ShuffleVector, OperandTraits::op_begin(this), - OperandTraits::operands(this), InsertBefore) { + ShuffleVector, AllocMarker, InsertBefore) { assert(isValidOperands(V1, V2, Mask) && "Invalid shuffle vector instruction operands!"); @@ -1698,8 +1686,7 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, ArrayRef Mask, : Instruction( VectorType::get(cast(V1->getType())->getElementType(), Mask.size(), isa(V1->getType())), - ShuffleVector, OperandTraits::op_begin(this), - OperandTraits::operands(this), InsertBefore) { + ShuffleVector, AllocMarker, InsertBefore) { assert(isValidOperands(V1, V2, Mask) && "Invalid shuffle vector instruction operands!"); Op<0>() = V1; @@ -2464,9 +2451,8 @@ void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef Idxs, } InsertValueInst::InsertValueInst(const InsertValueInst &IVI) - : Instruction(IVI.getType(), InsertValue, - OperandTraits::op_begin(this), 2), - Indices(IVI.Indices) { + : Instruction(IVI.getType(), InsertValue, AllocMarker), + Indices(IVI.Indices) { Op<0>() = IVI.getOperand(0); Op<1>() = IVI.getOperand(1); SubclassOptionalData = IVI.SubclassOptionalData; @@ -2565,8 +2551,7 @@ void UnaryOperator::AssertOK() { BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty, const Twine &Name, InsertPosition InsertBefore) - : Instruction(Ty, iType, OperandTraits::op_begin(this), - OperandTraits::operands(this), InsertBefore) { + : Instruction(Ty, iType, AllocMarker, InsertBefore) { Op<0>() = S1; Op<1>() = S2; setName(Name); @@ -3427,8 +3412,7 @@ AddrSpaceCastInst::AddrSpaceCastInst(Value *S, Type *Ty, const Twine &Name, CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS, Value *RHS, const Twine &Name, InsertPosition InsertBefore, Instruction *FlagsSource) - : Instruction(ty, op, OperandTraits::op_begin(this), - OperandTraits::operands(this), InsertBefore) { + : Instruction(ty, op, AllocMarker, InsertBefore) { Op<0>() = LHS; Op<1>() = RHS; setPredicate((Predicate)predicate); @@ -3918,12 +3902,12 @@ void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) { SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore) : Instruction(Type::getVoidTy(Value->getContext()), Instruction::Switch, - nullptr, 0, InsertBefore) { + AllocMarker, InsertBefore) { init(Value, Default, 2+NumCases*2); } SwitchInst::SwitchInst(const SwitchInst &SI) - : Instruction(SI.getType(), Instruction::Switch, nullptr, 0) { + : Instruction(SI.getType(), Instruction::Switch, AllocMarker) { init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands()); setNumHungOffUseOperands(SI.getNumOperands()); Use *OL = getOperandList(); @@ -4125,13 +4109,14 @@ void IndirectBrInst::growOperands() { IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases, InsertPosition InsertBefore) : Instruction(Type::getVoidTy(Address->getContext()), - Instruction::IndirectBr, nullptr, 0, InsertBefore) { + Instruction::IndirectBr, AllocMarker, InsertBefore) { init(Address, NumCases); } IndirectBrInst::IndirectBrInst(const IndirectBrInst &IBI) : Instruction(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr, - nullptr, IBI.getNumOperands()) { + AllocMarker) { + NumUserOperands = IBI.NumUserOperands; allocHungoffUses(IBI.getNumOperands()); Use *OL = getOperandList(); const Use *InOL = IBI.getOperandList(); @@ -4185,7 +4170,8 @@ FreezeInst::FreezeInst(Value *S, const Twine &Name, InsertPosition InsertBefore) // unit that uses these classes. GetElementPtrInst *GetElementPtrInst::cloneImpl() const { - return new (getNumOperands()) GetElementPtrInst(*this); + IntrusiveOperandsAllocMarker AllocMarker{getNumOperands()}; + return new (AllocMarker) GetElementPtrInst(*this, AllocMarker); } UnaryOperator *UnaryOperator::cloneImpl() const { @@ -4305,10 +4291,13 @@ AddrSpaceCastInst *AddrSpaceCastInst::cloneImpl() const { CallInst *CallInst::cloneImpl() const { if (hasOperandBundles()) { - unsigned DescriptorBytes = getNumOperandBundles() * sizeof(BundleOpInfo); - return new(getNumOperands(), DescriptorBytes) CallInst(*this); + IntrusiveOperandsAndDescriptorAllocMarker AllocMarker{ + getNumOperands(), + getNumOperandBundles() * unsigned(sizeof(BundleOpInfo))}; + return new (AllocMarker) CallInst(*this, AllocMarker); } - return new(getNumOperands()) CallInst(*this); + IntrusiveOperandsAllocMarker AllocMarker{getNumOperands()}; + return new (AllocMarker) CallInst(*this, AllocMarker); } SelectInst *SelectInst::cloneImpl() const { @@ -4331,18 +4320,20 @@ ShuffleVectorInst *ShuffleVectorInst::cloneImpl() const { return new ShuffleVectorInst(getOperand(0), getOperand(1), getShuffleMask()); } -PHINode *PHINode::cloneImpl() const { return new PHINode(*this); } +PHINode *PHINode::cloneImpl() const { return new (AllocMarker) PHINode(*this); } LandingPadInst *LandingPadInst::cloneImpl() const { return new LandingPadInst(*this); } ReturnInst *ReturnInst::cloneImpl() const { - return new(getNumOperands()) ReturnInst(*this); + IntrusiveOperandsAllocMarker AllocMarker{getNumOperands()}; + return new (AllocMarker) ReturnInst(*this, AllocMarker); } BranchInst *BranchInst::cloneImpl() const { - return new(getNumOperands()) BranchInst(*this); + IntrusiveOperandsAllocMarker AllocMarker{getNumOperands()}; + return new (AllocMarker) BranchInst(*this, AllocMarker); } SwitchInst *SwitchInst::cloneImpl() const { return new SwitchInst(*this); } @@ -4353,28 +4344,37 @@ IndirectBrInst *IndirectBrInst::cloneImpl() const { InvokeInst *InvokeInst::cloneImpl() const { if (hasOperandBundles()) { - unsigned DescriptorBytes = getNumOperandBundles() * sizeof(BundleOpInfo); - return new(getNumOperands(), DescriptorBytes) InvokeInst(*this); + IntrusiveOperandsAndDescriptorAllocMarker AllocMarker{ + getNumOperands(), + getNumOperandBundles() * unsigned(sizeof(BundleOpInfo))}; + return new (AllocMarker) InvokeInst(*this, AllocMarker); } - return new(getNumOperands()) InvokeInst(*this); + IntrusiveOperandsAllocMarker AllocMarker{getNumOperands()}; + return new (AllocMarker) InvokeInst(*this, AllocMarker); } CallBrInst *CallBrInst::cloneImpl() const { if (hasOperandBundles()) { - unsigned DescriptorBytes = getNumOperandBundles() * sizeof(BundleOpInfo); - return new (getNumOperands(), DescriptorBytes) CallBrInst(*this); + IntrusiveOperandsAndDescriptorAllocMarker AllocMarker{ + getNumOperands(), + getNumOperandBundles() * unsigned(sizeof(BundleOpInfo))}; + return new (AllocMarker) CallBrInst(*this, AllocMarker); } - return new (getNumOperands()) CallBrInst(*this); + IntrusiveOperandsAllocMarker AllocMarker{getNumOperands()}; + return new (AllocMarker) CallBrInst(*this, AllocMarker); } -ResumeInst *ResumeInst::cloneImpl() const { return new (1) ResumeInst(*this); } +ResumeInst *ResumeInst::cloneImpl() const { + return new (AllocMarker) ResumeInst(*this); +} CleanupReturnInst *CleanupReturnInst::cloneImpl() const { - return new (getNumOperands()) CleanupReturnInst(*this); + IntrusiveOperandsAllocMarker AllocMarker{getNumOperands()}; + return new (AllocMarker) CleanupReturnInst(*this, AllocMarker); } CatchReturnInst *CatchReturnInst::cloneImpl() const { - return new (getNumOperands()) CatchReturnInst(*this); + return new (AllocMarker) CatchReturnInst(*this); } CatchSwitchInst *CatchSwitchInst::cloneImpl() const { @@ -4382,7 +4382,8 @@ CatchSwitchInst *CatchSwitchInst::cloneImpl() const { } FuncletPadInst *FuncletPadInst::cloneImpl() const { - return new (getNumOperands()) FuncletPadInst(*this); + IntrusiveOperandsAllocMarker AllocMarker{getNumOperands()}; + return new (AllocMarker) FuncletPadInst(*this, AllocMarker); } UnreachableInst *UnreachableInst::cloneImpl() const { diff --git a/llvm/lib/IR/User.cpp b/llvm/lib/IR/User.cpp index 00dd9c72c469cb..b0aa785deb9afd 100644 --- a/llvm/lib/IR/User.cpp +++ b/llvm/lib/IR/User.cpp @@ -145,10 +145,7 @@ void *User::allocateFixedOperandUser(size_t Size, unsigned Us, ::operator new(Size + sizeof(Use) * Us + DescBytesToAllocate)); Use *Start = reinterpret_cast(Storage + DescBytesToAllocate); Use *End = Start + Us; - User *Obj = reinterpret_cast(End); - Obj->NumUserOperands = Us; - Obj->HasHungOffUses = false; - Obj->HasDescriptor = DescBytes != 0; + User *Obj = reinterpret_cast(End); for (; Start != End; Start++) new (Start) Use(Obj); @@ -160,22 +157,21 @@ void *User::allocateFixedOperandUser(size_t Size, unsigned Us, return Obj; } -void *User::operator new(size_t Size, unsigned Us) { - return allocateFixedOperandUser(Size, Us, 0); +void *User::operator new(size_t Size, IntrusiveOperandsAllocMarker allocTrait) { + return allocateFixedOperandUser(Size, allocTrait.NumOps, 0); } -void *User::operator new(size_t Size, unsigned Us, unsigned DescBytes) { - return allocateFixedOperandUser(Size, Us, DescBytes); +void *User::operator new(size_t Size, + IntrusiveOperandsAndDescriptorAllocMarker allocTrait) { + return allocateFixedOperandUser(Size, allocTrait.NumOps, + allocTrait.DescBytes); } -void *User::operator new(size_t Size) { +void *User::operator new(size_t Size, HungOffOperandsAllocMarker) { // Allocate space for a single Use* void *Storage = ::operator new(Size + sizeof(Use *)); Use **HungOffOperandList = static_cast(Storage); User *Obj = reinterpret_cast(HungOffOperandList + 1); - Obj->NumUserOperands = 0; - Obj->HasHungOffUses = true; - Obj->HasDescriptor = false; *HungOffOperandList = nullptr; return Obj; } diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 6ebf262379c2fb..8f151a99b11709 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1181,8 +1181,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // Enable contextual profiling instrumentation. const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled(); - const bool IsCtxProfUse = !UseCtxProfile.empty() && !PGOOpt && - Phase == ThinOrFullLTOPhase::ThinLTOPreLink; + const bool IsCtxProfUse = + !UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink; if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen || IsCtxProfUse) @@ -1673,7 +1673,7 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let // thinlto use the contextual info to perform imports; then use the contextual // profile in the post-thinlink phase. - if (!UseCtxProfile.empty() && !PGOOpt) { + if (!UseCtxProfile.empty()) { addRequiredLTOPreLinkPasses(MPM); return MPM; } diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index 18fdcda15a1a91..05d05f7ed10fb9 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -662,6 +662,12 @@ Value *SelectInst::create(Value *Cond, Value *True, Value *False, return createCommon(Cond, True, False, Name, Builder, Ctx); } +void SelectInst::swapValues() { + Ctx.getTracker().emplaceIfTracking(getOperandUse(1), + getOperandUse(2)); + cast(Val)->swapValues(); +} + bool SelectInst::classof(const Value *From) { return From->getSubclassID() == ClassID::Select; } @@ -2515,6 +2521,11 @@ BasicBlock *BlockAddress::getBasicBlock() const { Ctx.getValue(cast(Val)->getBasicBlock())); } +ConstantTokenNone *ConstantTokenNone::get(Context &Ctx) { + auto *LLVMC = llvm::ConstantTokenNone::get(Ctx.LLVMCtx); + return cast(Ctx.getOrCreateConstant(LLVMC)); +} + FunctionType *Function::getFunctionType() const { return cast( Ctx.getType(cast(Val)->getFunctionType())); @@ -2615,6 +2626,10 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { It->second = std::unique_ptr( new BlockAddress(cast(C), *this)); return It->second.get(); + case llvm::Value::ConstantTokenNoneVal: + It->second = std::unique_ptr( + new ConstantTokenNone(cast(C), *this)); + return It->second.get(); case llvm::Value::ConstantAggregateZeroVal: { auto *CAZ = cast(C); It->second = std::unique_ptr( diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 516d0cf33aaeb0..47da9d577cd827 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16615,7 +16615,7 @@ bool AArch64TargetLowering::shouldSinkOperands( static bool createTblShuffleMask(unsigned SrcWidth, unsigned DstWidth, unsigned NumElts, bool IsLittleEndian, SmallVectorImpl &Mask) { - if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth >= 64) + if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth > 64) return false; assert(DstWidth % SrcWidth == 0 && @@ -16649,7 +16649,7 @@ static Value *createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op, return nullptr; auto *FirstEltZero = Builder.CreateInsertElement( - PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0)); + PoisonValue::get(SrcTy), Builder.getIntN(SrcWidth, 0), uint64_t(0)); Value *Result = Builder.CreateShuffleVector(Op, FirstEltZero, Mask); Result = Builder.CreateBitCast(Result, DstTy); if (DstTy != ZExtTy) @@ -16670,7 +16670,7 @@ static Value *createTblShuffleForSExt(IRBuilderBase &Builder, Value *Op, return nullptr; auto *FirstEltZero = Builder.CreateInsertElement( - PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0)); + PoisonValue::get(SrcTy), Builder.getIntN(SrcWidth, 0), uint64_t(0)); return Builder.CreateShuffleVector(Op, FirstEltZero, Mask); } @@ -16847,6 +16847,9 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion( return false; } + if (DstTy->getScalarSizeInBits() >= 64) + return false; + IRBuilder<> Builder(ZExt); Value *Result = createTblShuffleForZExt( Builder, ZExt->getOperand(0), cast(ZExt->getType()), @@ -16859,8 +16862,10 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion( } auto *UIToFP = dyn_cast(I); - if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) && - DstTy->getElementType()->isFloatTy()) { + if (UIToFP && ((SrcTy->getElementType()->isIntegerTy(8) && + DstTy->getElementType()->isFloatTy()) || + (SrcTy->getElementType()->isIntegerTy(16) && + DstTy->getElementType()->isDoubleTy()))) { IRBuilder<> Builder(I); Value *ZExt = createTblShuffleForZExt( Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy), diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 759a58ed3930e3..902ab37bf741ed 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -719,6 +719,15 @@ def BufferStore : DXILOp<69, bufferStore> { let stages = [Stages]; } +def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> { + let Doc = "checks whether a Sample, Gather, or Load operation " + "accessed mapped tiles in a tiled resource"; + let arguments = [OverloadTy]; + let result = Int1Ty; + let overloads = [Overloads]; + let stages = [Stages]; +} + def ThreadId : DXILOp<93, threadId> { let Doc = "Reads the thread ID"; let LLVMIntrinsic = int_dx_thread_id; diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp index 3b2a5f5061eb83..7719d6b1079110 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp @@ -386,6 +386,7 @@ static Error makeOpError(dxil::OpCode OpCode, Twine Msg) { Expected DXILOpBuilder::tryCreateOp(dxil::OpCode OpCode, ArrayRef Args, + const Twine &Name, Type *RetTy) { const OpCodeProperty *Prop = getOpCodeProperty(OpCode); @@ -451,12 +452,12 @@ Expected DXILOpBuilder::tryCreateOp(dxil::OpCode OpCode, OpArgs.push_back(IRB.getInt32(llvm::to_underlying(OpCode))); OpArgs.append(Args.begin(), Args.end()); - return IRB.CreateCall(DXILFn, OpArgs); + return IRB.CreateCall(DXILFn, OpArgs, Name); } CallInst *DXILOpBuilder::createOp(dxil::OpCode OpCode, ArrayRef Args, - Type *RetTy) { - Expected Result = tryCreateOp(OpCode, Args, RetTy); + const Twine &Name, Type *RetTy) { + Expected Result = tryCreateOp(OpCode, Args, Name, RetTy); if (Error E = Result.takeError()) llvm_unreachable("Invalid arguments for operation"); return *Result; diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h index a68f0c43f67afb..037ae3822cfb90 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.h +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h @@ -39,11 +39,12 @@ class DXILOpBuilder { /// Create a call instruction for the given DXIL op. The arguments /// must be valid for an overload of the operation. CallInst *createOp(dxil::OpCode Op, ArrayRef Args, - Type *RetTy = nullptr); + const Twine &Name = "", Type *RetTy = nullptr); /// Try to create a call instruction for the given DXIL op. Fails if the /// overload is invalid. Expected tryCreateOp(dxil::OpCode Op, ArrayRef Args, + const Twine &Name = "", Type *RetTy = nullptr); /// Get a `%dx.types.ResRet` type with the given element type. diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index f968cab1dccf1e..3ee3ee05563c24 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -118,7 +118,7 @@ class OpLowerer { Args.append(CI->arg_begin(), CI->arg_end()); Expected OpCall = - OpBuilder.tryCreateOp(DXILOp, Args, F.getReturnType()); + OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), F.getReturnType()); if (Error E = OpCall.takeError()) return E; @@ -198,7 +198,7 @@ class OpLowerer { ConstantInt::get(Int32Ty, Binding.RecordID), CI->getArgOperand(3), CI->getArgOperand(4)}; Expected OpCall = - OpBuilder.tryCreateOp(OpCode::CreateHandle, Args); + OpBuilder.tryCreateOp(OpCode::CreateHandle, Args, CI->getName()); if (Error E = OpCall.takeError()) return E; @@ -233,15 +233,16 @@ class OpLowerer { Binding.LowerBound, UpperBound, Binding.Space, RI.getResourceClass()); std::array BindArgs{ResBind, CI->getArgOperand(3), CI->getArgOperand(4)}; - Expected OpBind = - OpBuilder.tryCreateOp(OpCode::CreateHandleFromBinding, BindArgs); + Expected OpBind = OpBuilder.tryCreateOp( + OpCode::CreateHandleFromBinding, BindArgs, CI->getName()); if (Error E = OpBind.takeError()) return E; std::array AnnotateArgs{ *OpBind, OpBuilder.getResProps(Props.first, Props.second)}; - Expected OpAnnotate = - OpBuilder.tryCreateOp(OpCode::AnnotateHandle, AnnotateArgs); + Expected OpAnnotate = OpBuilder.tryCreateOp( + OpCode::AnnotateHandle, AnnotateArgs, + CI->hasName() ? CI->getName() + "_annot" : Twine()); if (Error E = OpAnnotate.takeError()) return E; @@ -265,16 +266,54 @@ class OpLowerer { /// Replace uses of \c Intrin with the values in the `dx.ResRet` of \c Op. /// Since we expect to be post-scalarization, make an effort to avoid vectors. - Error replaceResRetUses(CallInst *Intrin, CallInst *Op) { + Error replaceResRetUses(CallInst *Intrin, CallInst *Op, bool HasCheckBit) { IRBuilder<> &IRB = OpBuilder.getIRB(); + Instruction *OldResult = Intrin; Type *OldTy = Intrin->getType(); + if (HasCheckBit) { + auto *ST = cast(OldTy); + + Value *CheckOp = nullptr; + Type *Int32Ty = IRB.getInt32Ty(); + for (Use &U : make_early_inc_range(OldResult->uses())) { + if (auto *EVI = dyn_cast(U.getUser())) { + ArrayRef Indices = EVI->getIndices(); + assert(Indices.size() == 1); + // We're only interested in uses of the check bit for now. + if (Indices[0] != 1) + continue; + if (!CheckOp) { + Value *NewEVI = IRB.CreateExtractValue(Op, 4); + Expected OpCall = OpBuilder.tryCreateOp( + OpCode::CheckAccessFullyMapped, {NewEVI}, + OldResult->hasName() ? OldResult->getName() + "_check" + : Twine(), + Int32Ty); + if (Error E = OpCall.takeError()) + return E; + CheckOp = *OpCall; + } + EVI->replaceAllUsesWith(CheckOp); + EVI->eraseFromParent(); + } + } + + OldResult = cast( + IRB.CreateExtractValue(Op, 0, OldResult->getName())); + OldTy = ST->getElementType(0); + } + // For scalars, we just extract the first element. if (!isa(OldTy)) { Value *EVI = IRB.CreateExtractValue(Op, 0); - Intrin->replaceAllUsesWith(EVI); - Intrin->eraseFromParent(); + OldResult->replaceAllUsesWith(EVI); + OldResult->eraseFromParent(); + if (OldResult != Intrin) { + assert(Intrin->use_empty() && "Intrinsic still has uses?"); + Intrin->eraseFromParent(); + } return Error::success(); } @@ -283,7 +322,7 @@ class OpLowerer { // The users of the operation should all be scalarized, so we attempt to // replace the extractelements with extractvalues directly. - for (Use &U : make_early_inc_range(Intrin->uses())) { + for (Use &U : make_early_inc_range(OldResult->uses())) { if (auto *EEI = dyn_cast(U.getUser())) { if (auto *IndexOp = dyn_cast(EEI->getIndexOperand())) { size_t IndexVal = IndexOp->getZExtValue(); @@ -331,7 +370,7 @@ class OpLowerer { // If we still have uses, then we're not fully scalarized and need to // recreate the vector. This should only happen for things like exported // functions from libraries. - if (!Intrin->use_empty()) { + if (!OldResult->use_empty()) { for (int I = 0, E = N; I != E; ++I) if (!Extracts[I]) Extracts[I] = IRB.CreateExtractValue(Op, I); @@ -339,14 +378,19 @@ class OpLowerer { Value *Vec = UndefValue::get(OldTy); for (int I = 0, E = N; I != E; ++I) Vec = IRB.CreateInsertElement(Vec, Extracts[I], I); - Intrin->replaceAllUsesWith(Vec); + OldResult->replaceAllUsesWith(Vec); + } + + OldResult->eraseFromParent(); + if (OldResult != Intrin) { + assert(Intrin->use_empty() && "Intrinsic still has uses?"); + Intrin->eraseFromParent(); } - Intrin->eraseFromParent(); return Error::success(); } - [[nodiscard]] bool lowerTypedBufferLoad(Function &F) { + [[nodiscard]] bool lowerTypedBufferLoad(Function &F, bool HasCheckBit) { IRBuilder<> &IRB = OpBuilder.getIRB(); Type *Int32Ty = IRB.getInt32Ty(); @@ -358,14 +402,17 @@ class OpLowerer { Value *Index0 = CI->getArgOperand(1); Value *Index1 = UndefValue::get(Int32Ty); - Type *NewRetTy = OpBuilder.getResRetType(CI->getType()->getScalarType()); + Type *OldTy = CI->getType(); + if (HasCheckBit) + OldTy = cast(OldTy)->getElementType(0); + Type *NewRetTy = OpBuilder.getResRetType(OldTy->getScalarType()); std::array Args{Handle, Index0, Index1}; - Expected OpCall = - OpBuilder.tryCreateOp(OpCode::BufferLoad, Args, NewRetTy); + Expected OpCall = OpBuilder.tryCreateOp( + OpCode::BufferLoad, Args, CI->getName(), NewRetTy); if (Error E = OpCall.takeError()) return E; - if (Error E = replaceResRetUses(CI, *OpCall)) + if (Error E = replaceResRetUses(CI, *OpCall, HasCheckBit)) return E; return Error::success(); @@ -405,7 +452,7 @@ class OpLowerer { std::array Args{Handle, Index0, Index1, Data0, Data1, Data2, Data3, Mask}; Expected OpCall = - OpBuilder.tryCreateOp(OpCode::BufferStore, Args); + OpBuilder.tryCreateOp(OpCode::BufferStore, Args, CI->getName()); if (Error E = OpCall.takeError()) return E; @@ -434,7 +481,10 @@ class OpLowerer { HasErrors |= lowerHandleFromBinding(F); break; case Intrinsic::dx_typedBufferLoad: - HasErrors |= lowerTypedBufferLoad(F); + HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/false); + break; + case Intrinsic::dx_typedBufferLoad_checkbit: + HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true); break; case Intrinsic::dx_typedBufferStore: HasErrors |= lowerTypedBufferStore(F); diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp index 1205ad4c6b008f..082546c4dd72f8 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -139,16 +139,21 @@ #include "NVPTX.h" #include "NVPTXTargetMachine.h" #include "NVPTXUtilities.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/PtrUseVisitor.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include #include @@ -217,7 +222,8 @@ INITIALIZE_PASS_END(NVPTXLowerArgs, "nvptx-lower-args", // pointer in parameter AS. // For "escapes" (to memory, a function call, or a ptrtoint), cast the OldUse to // generic using cvta.param. -static void convertToParamAS(Use *OldUse, Value *Param, bool GridConstant) { +static void convertToParamAS(Use *OldUse, Value *Param, bool HasCvtaParam, + bool IsGridConstant) { Instruction *I = dyn_cast(OldUse->getUser()); assert(I && "OldUse must be in an instruction"); struct IP { @@ -228,7 +234,8 @@ static void convertToParamAS(Use *OldUse, Value *Param, bool GridConstant) { SmallVector ItemsToConvert = {{OldUse, I, Param}}; SmallVector InstructionsToDelete; - auto CloneInstInParamAS = [GridConstant](const IP &I) -> Value * { + auto CloneInstInParamAS = [HasCvtaParam, + IsGridConstant](const IP &I) -> Value * { if (auto *LI = dyn_cast(I.OldInstruction)) { LI->setOperand(0, I.NewParam); return LI; @@ -252,8 +259,25 @@ static void convertToParamAS(Use *OldUse, Value *Param, bool GridConstant) { // Just pass through the argument, the old ASC is no longer needed. return I.NewParam; } + if (auto *MI = dyn_cast(I.OldInstruction)) { + if (MI->getRawSource() == I.OldUse->get()) { + // convert to memcpy/memmove from param space. + IRBuilder<> Builder(I.OldInstruction); + Intrinsic::ID ID = MI->getIntrinsicID(); + + CallInst *B = Builder.CreateMemTransferInst( + ID, MI->getRawDest(), MI->getDestAlign(), I.NewParam, + MI->getSourceAlign(), MI->getLength(), MI->isVolatile()); + for (unsigned I : {0, 1}) + if (uint64_t Bytes = MI->getParamDereferenceableBytes(I)) + B->addDereferenceableParamAttr(I, Bytes); + return B; + } + // We may be able to handle other cases if the argument is + // __grid_constant__ + } - if (GridConstant) { + if (HasCvtaParam) { auto GetParamAddrCastToGeneric = [](Value *Addr, Instruction *OriginalUser) -> Value * { PointerType *ReturnTy = @@ -269,24 +293,44 @@ static void convertToParamAS(Use *OldUse, Value *Param, bool GridConstant) { OriginalUser->getIterator()); return CvtToGenCall; }; - - if (auto *CI = dyn_cast(I.OldInstruction)) { - I.OldUse->set(GetParamAddrCastToGeneric(I.NewParam, CI)); - return CI; + auto *ParamInGenericAS = + GetParamAddrCastToGeneric(I.NewParam, I.OldInstruction); + + // phi/select could use generic arg pointers w/o __grid_constant__ + if (auto *PHI = dyn_cast(I.OldInstruction)) { + for (auto [Idx, V] : enumerate(PHI->incoming_values())) { + if (V.get() == I.OldUse->get()) + PHI->setIncomingValue(Idx, ParamInGenericAS); + } } - if (auto *SI = dyn_cast(I.OldInstruction)) { - // byval address is being stored, cast it to generic - if (SI->getValueOperand() == I.OldUse->get()) - SI->setOperand(0, GetParamAddrCastToGeneric(I.NewParam, SI)); - return SI; + if (auto *SI = dyn_cast(I.OldInstruction)) { + if (SI->getTrueValue() == I.OldUse->get()) + SI->setTrueValue(ParamInGenericAS); + if (SI->getFalseValue() == I.OldUse->get()) + SI->setFalseValue(ParamInGenericAS); } - if (auto *PI = dyn_cast(I.OldInstruction)) { - if (PI->getPointerOperand() == I.OldUse->get()) - PI->setOperand(0, GetParamAddrCastToGeneric(I.NewParam, PI)); - return PI; + + // Escapes or writes can only use generic param pointers if + // __grid_constant__ is in effect. + if (IsGridConstant) { + if (auto *CI = dyn_cast(I.OldInstruction)) { + I.OldUse->set(ParamInGenericAS); + return CI; + } + if (auto *SI = dyn_cast(I.OldInstruction)) { + // byval address is being stored, cast it to generic + if (SI->getValueOperand() == I.OldUse->get()) + SI->setOperand(0, ParamInGenericAS); + return SI; + } + if (auto *PI = dyn_cast(I.OldInstruction)) { + if (PI->getPointerOperand() == I.OldUse->get()) + PI->setOperand(0, ParamInGenericAS); + return PI; + } + // TODO: iIf we allow stores, we should allow memcpy/memset to + // parameter, too. } - llvm_unreachable( - "Instruction unsupported even for grid_constant argument"); } llvm_unreachable("Unsupported instruction"); @@ -409,49 +453,110 @@ static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS, } } +namespace { +struct ArgUseChecker : PtrUseVisitor { + using Base = PtrUseVisitor; + + bool IsGridConstant; + // Set of phi/select instructions using the Arg + SmallPtrSet Conditionals; + + ArgUseChecker(const DataLayout &DL, bool IsGridConstant) + : PtrUseVisitor(DL), IsGridConstant(IsGridConstant) {} + + PtrInfo visitArgPtr(Argument &A) { + assert(A.getType()->isPointerTy()); + IntegerType *IntIdxTy = cast(DL.getIndexType(A.getType())); + IsOffsetKnown = false; + Offset = APInt(IntIdxTy->getBitWidth(), 0); + PI.reset(); + Conditionals.clear(); + + LLVM_DEBUG(dbgs() << "Checking Argument " << A << "\n"); + // Enqueue the uses of this pointer. + enqueueUsers(A); + + // Visit all the uses off the worklist until it is empty. + // Note that unlike PtrUseVisitor we intentionally do not track offsets. + // We're only interested in how we use the pointer. + while (!(Worklist.empty() || PI.isAborted())) { + UseToVisit ToVisit = Worklist.pop_back_val(); + U = ToVisit.UseAndIsOffsetKnown.getPointer(); + Instruction *I = cast(U->getUser()); + if (isa(I) || isa(I)) + Conditionals.insert(I); + LLVM_DEBUG(dbgs() << "Processing " << *I << "\n"); + Base::visit(I); + } + if (PI.isEscaped()) + LLVM_DEBUG(dbgs() << "Argument pointer escaped: " << *PI.getEscapingInst() + << "\n"); + else if (PI.isAborted()) + LLVM_DEBUG(dbgs() << "Pointer use needs a copy: " << *PI.getAbortingInst() + << "\n"); + LLVM_DEBUG(dbgs() << "Traversed " << Conditionals.size() + << " conditionals\n"); + return PI; + } + + void visitStoreInst(StoreInst &SI) { + // Storing the pointer escapes it. + if (U->get() == SI.getValueOperand()) + return PI.setEscapedAndAborted(&SI); + // Writes to the pointer are UB w/ __grid_constant__, but do not force a + // copy. + if (!IsGridConstant) + return PI.setAborted(&SI); + } + + void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { + // ASC to param space are no-ops and do not need a copy + if (ASC.getDestAddressSpace() != ADDRESS_SPACE_PARAM) + return PI.setEscapedAndAborted(&ASC); + Base::visitAddrSpaceCastInst(ASC); + } + + void visitPtrToIntInst(PtrToIntInst &I) { + if (IsGridConstant) + return; + Base::visitPtrToIntInst(I); + } + void visitPHINodeOrSelectInst(Instruction &I) { + assert(isa(I) || isa(I)); + } + // PHI and select just pass through the pointers. + void visitPHINode(PHINode &PN) { enqueueUsers(PN); } + void visitSelectInst(SelectInst &SI) { enqueueUsers(SI); } + + void visitMemTransferInst(MemTransferInst &II) { + if (*U == II.getRawDest() && !IsGridConstant) + PI.setAborted(&II); + // memcpy/memmove are OK when the pointer is source. We can convert them to + // AS-specific memcpy. + } + + void visitMemSetInst(MemSetInst &II) { + if (!IsGridConstant) + PI.setAborted(&II); + } +}; // struct ArgUseChecker +} // namespace + void NVPTXLowerArgs::handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) { - bool IsGridConstant = isParamGridConstant(*Arg); Function *Func = Arg->getParent(); + bool HasCvtaParam = TM.getSubtargetImpl(*Func)->hasCvtaParam(); + bool IsGridConstant = HasCvtaParam && isParamGridConstant(*Arg); + const DataLayout &DL = Func->getDataLayout(); BasicBlock::iterator FirstInst = Func->getEntryBlock().begin(); Type *StructType = Arg->getParamByValType(); assert(StructType && "Missing byval type"); - auto AreSupportedUsers = [&](Value *Start) { - SmallVector ValuesToCheck = {Start}; - auto IsSupportedUse = [IsGridConstant](Value *V) -> bool { - if (isa(V) || isa(V) || isa(V)) - return true; - // ASC to param space are OK, too -- we'll just strip them. - if (auto *ASC = dyn_cast(V)) { - if (ASC->getDestAddressSpace() == ADDRESS_SPACE_PARAM) - return true; - } - // Simple calls and stores are supported for grid_constants - // writes to these pointers are undefined behaviour - if (IsGridConstant && - (isa(V) || isa(V) || isa(V))) - return true; - return false; - }; - - while (!ValuesToCheck.empty()) { - Value *V = ValuesToCheck.pop_back_val(); - if (!IsSupportedUse(V)) { - LLVM_DEBUG(dbgs() << "Need a " - << (isParamGridConstant(*Arg) ? "cast " : "copy ") - << "of " << *Arg << " because of " << *V << "\n"); - (void)Arg; - return false; - } - if (!isa(V) && !isa(V) && !isa(V) && - !isa(V)) - llvm::append_range(ValuesToCheck, V->users()); - } - return true; - }; - - if (llvm::all_of(Arg->users(), AreSupportedUsers)) { + ArgUseChecker AUC(DL, IsGridConstant); + ArgUseChecker::PtrInfo PI = AUC.visitArgPtr(*Arg); + bool ArgUseIsReadOnly = !(PI.isEscaped() || PI.isAborted()); + // Easy case, accessing parameter directly is fine. + if (ArgUseIsReadOnly && AUC.Conditionals.empty()) { // Convert all loads and intermediate operations to use parameter AS and // skip creation of a local copy of the argument. SmallVector UsesToUpdate; @@ -462,7 +567,7 @@ void NVPTXLowerArgs::handleByValParam(const NVPTXTargetMachine &TM, Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(), FirstInst); for (Use *U : UsesToUpdate) - convertToParamAS(U, ArgInParamAS, IsGridConstant); + convertToParamAS(U, ArgInParamAS, HasCvtaParam, IsGridConstant); LLVM_DEBUG(dbgs() << "No need to copy or cast " << *Arg << "\n"); const auto *TLI = @@ -473,13 +578,17 @@ void NVPTXLowerArgs::handleByValParam(const NVPTXTargetMachine &TM, return; } - const DataLayout &DL = Func->getDataLayout(); + // We can't access byval arg directly and need a pointer. on sm_70+ we have + // ability to take a pointer to the argument without making a local copy. + // However, we're still not allowed to write to it. If the user specified + // `__grid_constant__` for the argument, we'll consider escaped pointer as + // read-only. unsigned AS = DL.getAllocaAddrSpace(); - if (isParamGridConstant(*Arg)) { - // Writes to a grid constant are undefined behaviour. We do not need a - // temporary copy. When a pointer might have escaped, conservatively replace - // all of its uses (which might include a device function call) with a cast - // to the generic address space. + if (HasCvtaParam && (ArgUseIsReadOnly || IsGridConstant)) { + LLVM_DEBUG(dbgs() << "Using non-copy pointer to " << *Arg << "\n"); + // Replace all argument pointer uses (which might include a device function + // call) with a cast to the generic address space using cvta.param + // instruction, which avoids a local copy. IRBuilder<> IRB(&Func->getEntryBlock().front()); // Cast argument to param address space @@ -500,6 +609,7 @@ void NVPTXLowerArgs::handleByValParam(const NVPTXTargetMachine &TM, // Do not replace Arg in the cast to param space CastToParam->setOperand(0, Arg); } else { + LLVM_DEBUG(dbgs() << "Creating a local copy of " << *Arg << "\n"); // Otherwise we have to create a temporary copy. AllocaInst *AllocA = new AllocaInst(StructType, AS, Arg->getName(), FirstInst); diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index 0591782e8148b9..457f10f1d64a26 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -94,6 +94,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { bool hasDotInstructions() const { return SmVersion >= 61 && PTXVersion >= 50; } + bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; } unsigned int getFullSmVersion() const { return FullSmVersion; } unsigned int getSmVersion() const { return getFullSmVersion() / 10; } // GPUs with "a" suffix have include architecture-accelerated features that diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index ff4c0e9bbd50e7..02585c9f603736 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3535,7 +3535,21 @@ bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) { return selectVSplat(N, SplatVal); } -bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) { +bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) { + // Allow bitcasts from XLenVT -> FP. + if (N.getOpcode() == ISD::BITCAST && + N.getOperand(0).getValueType() == Subtarget->getXLenVT()) { + Imm = N.getOperand(0); + return true; + } + // Allow moves from XLenVT to FP. + if (N.getOpcode() == RISCVISD::FMV_H_X || + N.getOpcode() == RISCVISD::FMV_W_X_RV64) { + Imm = N.getOperand(0); + return true; + } + + // Otherwise, look for FP constants that can materialized with scalar int. ConstantFPSDNode *CFP = dyn_cast(N.getNode()); if (!CFP) return false; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 1d120c13442d51..2e738d8d25a6dc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -140,7 +140,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { // Matches the splat of a value which can be extended or truncated, such that // only the bottom 8 bits are preserved. bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal); - bool selectFPImm(SDValue N, SDValue &Imm); + bool selectScalarFPAsInt(SDValue N, SDValue &Imm); bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm); template bool selectRVVSimm5(SDValue N, SDValue &Imm) { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4554163d4551dc..8112b5eb144da9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1128,6 +1128,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, continue; SetCommonVFPActions(VT); SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); + SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs); } } @@ -1137,6 +1138,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, continue; SetCommonVFPActions(VT); SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); + SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs); SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); } } @@ -14480,6 +14482,13 @@ struct NodeExtensionHelper { if (Source.getValueType() == NarrowVT) return Source; + // vfmadd_vl -> vfwmadd_vl can take bf16 operands + if (Source.getValueType().getVectorElementType() == MVT::bf16) { + assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 && + Root->getOpcode() == RISCVISD::VFMADD_VL); + return Source; + } + unsigned ExtOpc = getExtOpc(*SupportsExt); // If we need an extension, we should be changing the type. @@ -15731,7 +15740,7 @@ static SDValue performVFMADD_VLCombine(SDNode *N, return V; if (N->getValueType(0).getVectorElementType() == MVT::f32 && - !Subtarget.hasVInstructionsF16()) + !Subtarget.hasVInstructionsF16() && !Subtarget.hasStdExtZvfbfwma()) return SDValue(); // FIXME: Ignore strict opcodes for now. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 13212c2aea5dde..02f65ffcea6454 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2918,7 +2918,7 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI, // if any possible. if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO && (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() || - F.hasSection())) + F.hasSection() || F.getSectionPrefix())) return outliner::InstrType::Illegal; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 430e09fd834ba7..fe7de9d7bc79aa 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -236,7 +236,8 @@ def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT AVL:$vl)))>; // This must be kept in sync with RISCV::VLMaxSentinel. def VLMax : OutPatFrag<(ops), (XLenVT -1)>; -def SelectFPImm : ComplexPattern; +def SelectScalarFPAsInt : ComplexPattern; // List of EEW. defvar EEWList = [8, 16, 32, 64]; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index f12f82cb159529..b54cdcbd1b0e9c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -1374,7 +1374,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { fvti.AVL, fvti.Log2SEW)>; def : Pat<(fvti.Vector (vselect (fvti.Mask V0), - (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))), + (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))), fvti.RegClass:$rs2)), (!cast("PseudoVMERGE_VXM_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 699536b1869692..a27c3a416816e2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2009,13 +2009,18 @@ multiclass VPatWidenFPMulAccVL_VV_VF { } } -multiclass VPatWidenFPMulAccVL_VV_VF_RM { - foreach vtiToWti = AllWidenableFloatVectors in { +multiclass VPatWidenFPMulAccVL_VV_VF_RM vtiToWtis = + AllWidenableFloatVectors> { + foreach vtiToWti = vtiToWtis in { defvar vti = vtiToWti.Vti; defvar wti = vtiToWti.Wti; defvar suffix = vti.LMul.MX # "_E" # vti.SEW; let Predicates = !listconcat(GetVTypePredicates.Predicates, - GetVTypePredicates.Predicates) in { + GetVTypePredicates.Predicates, + !if(!eq(vti.Scalar, bf16), + [HasStdExtZvfbfwma], + [])) in { def : Pat<(vop (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2), (wti.Vector wti.RegClass:$rd), (vti.Mask V0), @@ -2451,6 +2456,8 @@ defm : VPatFPMulAccVL_VV_VF_RM; // 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions defm : VPatWidenFPMulAccVL_VV_VF_RM; +defm : VPatWidenFPMulAccVL_VV_VF_RM; defm : VPatWidenFPMulAccVL_VV_VF_RM; defm : VPatWidenFPMulAccVL_VV_VF_RM; defm : VPatWidenFPMulAccVL_VV_VF_RM; @@ -2568,7 +2575,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), - (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))), + (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))), fvti.RegClass:$rs2, fvti.RegClass:$passthru, VLOpFrag)), @@ -2612,7 +2619,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { (!cast("PseudoVMV_V_I_"#fvti.LMul.MX) $passthru, 0, GPR:$vl, fvti.Log2SEW, TU_MU)>; def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl - fvti.Vector:$passthru, (fvti.Scalar (SelectFPImm (XLenVT GPR:$imm))), VLOpFrag)), + fvti.Vector:$passthru, (fvti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)), (!cast("PseudoVMV_V_X_"#fvti.LMul.MX) $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>; } @@ -2933,7 +2940,7 @@ foreach vti = NoGroupFloatVectors in { VLOpFrag)), (PseudoVMV_S_X $passthru, (XLenVT X0), GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), - (vti.Scalar (SelectFPImm (XLenVT GPR:$imm))), + (vti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)), (PseudoVMV_S_X $passthru, GPR:$imm, GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index e809e15eacf696..2b5e7c47279284 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1033,6 +1033,21 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::CastContextHint::None, CostKind); break; } + + // vp compare + case Intrinsic::vp_icmp: + case Intrinsic::vp_fcmp: { + Intrinsic::ID IID = ICA.getID(); + std::optional FOp = VPIntrinsic::getFunctionalOpcodeForVP(IID); + auto *UI = dyn_cast(ICA.getInst()); + + // We can only handle vp_cmp intrinsics with underlying instructions. + if (!UI) + break; + assert(FOp); + return getCmpSelInstrCost(*FOp, ICA.getArgTypes()[0], ICA.getReturnType(), + UI->getPredicate(), CostKind); + } } if (ST->hasVInstructions() && RetTy->isVectorTy()) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 5cc084f3ab1387..1875a8fd4c4404 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -96,6 +96,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(ISD::STORE, T, Custom); } } + if (Subtarget->hasFP16()) { + setOperationAction(ISD::LOAD, MVT::v8f16, Custom); + setOperationAction(ISD::STORE, MVT::v8f16, Custom); + } if (Subtarget->hasReferenceTypes()) { // We need custom load and store lowering for both externref, funcref and // Other. The MVT::Other here represents tables of reference types. @@ -208,6 +212,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( MVT::v2f64}) setOperationAction(ISD::BUILD_VECTOR, T, Custom); + if (Subtarget->hasFP16()) + setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom); + // We have custom shuffle lowering to expose the shuffle mask for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, MVT::v2f64}) @@ -2055,6 +2062,18 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) { SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { + MVT VT = Op.getSimpleValueType(); + if (VT == MVT::v8f16) { + // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler + // FP16 type, so cast them to I16s. + MVT IVT = VT.changeVectorElementType(MVT::i16); + SmallVector NewOps; + for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I) + NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I))); + SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps); + return DAG.getBitcast(VT, Res); + } + if (auto ConvertLow = LowerConvertLow(Op, DAG)) return ConvertLow; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 9d17d90f530541..b652ee98cef107 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -189,7 +189,7 @@ defm LOAD_V128_A64 : } // Def load patterns from WebAssemblyInstrMemory.td for vector types -foreach vec = StdVecs in { +foreach vec = AllVecs in { defm : LoadPat; } @@ -390,7 +390,7 @@ defm STORE_V128_A64 : } // Def store patterns from WebAssemblyInstrMemory.td for vector types -foreach vec = StdVecs in { +foreach vec = AllVecs in { defm : StorePat; } @@ -684,7 +684,7 @@ defm EXTRACT_LANE_F16x8 : HALF_PRECISION_I<(outs F32:$dst), (ins V128:$vec, vec_i8imm_op:$idx), (outs), (ins vec_i8imm_op:$idx), [(set (f32 F32:$dst), (int_wasm_extract_lane_f16x8 - (v8f16 V128:$vec), (i32 LaneIdx16:$idx)))], + (v8f16 V128:$vec), (i32 LaneIdx8:$idx)))], "f16x8.extract_lane\t$dst, $vec, $idx", "f16x8.extract_lane\t$idx", 0x121>; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d0794cb9bfde3d..c3b919921f23b3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29851,17 +29851,103 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, DAG.getNode(Opc, dl, ExtVT, R, Amt)); } - // Constant ISD::SRA/SRL can be performed efficiently on vXi8 vectors as we - // extend to vXi16 to perform a MUL scale effectively as a MUL_LOHI. + // Constant ISD::SRA/SRL can be performed efficiently on vXi8 vectors by using + // vXi16 vector operations. if (ConstantAmt && (Opc == ISD::SRA || Opc == ISD::SRL) && (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) || (VT == MVT::v64i8 && Subtarget.hasBWI())) && !Subtarget.hasXOP()) { int NumElts = VT.getVectorNumElements(); + MVT VT16 = MVT::getVectorVT(MVT::i16, NumElts / 2); + // We can do this extra fast if each pair of i8 elements is shifted by the + // same amount by doing this SWAR style: use a shift to move the valid bits + // to the right position, mask out any bits which crossed from one element + // to the other. + if (Opc == ISD::SRL || Opc == ISD::SHL) { + APInt UndefElts; + SmallVector AmtBits; + if (getTargetConstantBitsFromNode(Amt, /*EltSizeInBits=*/8, UndefElts, + AmtBits, /*AllowWholeUndefs=*/true, + /*AllowPartialUndefs=*/false)) { + // This optimized lowering is only valid if the elements in a pair can + // be treated identically. + bool SameShifts = true; + SmallVector AmtBits16(NumElts / 2); + APInt UndefElts16 = APInt::getZero(AmtBits16.size()); + for (unsigned SrcI = 0, E = AmtBits.size(); SrcI != E; SrcI += 2) { + unsigned DstI = SrcI / 2; + // Both elements are undef? Make a note and keep going. + if (UndefElts[SrcI] && UndefElts[SrcI + 1]) { + AmtBits16[DstI] = APInt::getZero(16); + UndefElts16.setBit(DstI); + continue; + } + // Even element is undef? We will shift it by the same shift amount as + // the odd element. + if (UndefElts[SrcI]) { + AmtBits16[DstI] = AmtBits[SrcI + 1].zext(16); + continue; + } + // Odd element is undef? We will shift it by the same shift amount as + // the even element. + if (UndefElts[SrcI + 1]) { + AmtBits16[DstI] = AmtBits[SrcI].zext(16); + continue; + } + // Both elements are equal. + if (AmtBits[SrcI] == AmtBits[SrcI + 1]) { + AmtBits16[DstI] = AmtBits[SrcI].zext(16); + continue; + } + // One of the provisional i16 elements will not have the same shift + // amount. Let's bail. + SameShifts = false; + break; + } + + // We are only dealing with identical pairs and the operation is a + // logical shift. + if (SameShifts) { + // Cast the operand to vXi16. + SDValue R16 = DAG.getBitcast(VT16, R); + // Create our new vector of shift amounts. + SDValue Amt16 = getConstVector(AmtBits16, UndefElts16, VT16, DAG, dl); + // Perform the actual shift. + SDValue ShiftedR = DAG.getNode(Opc, dl, VT16, R16, Amt16); + // Now we need to construct a mask which will "drop" bits that get + // shifted past the LSB/MSB. For a logical shift left, it will look + // like: + // MaskLowBits = (0xff << Amt16) & 0xff; + // MaskHighBits = MaskLowBits << 8; + // Mask = MaskLowBits | MaskHighBits; + // + // This masking ensures that bits cannot migrate from one i8 to + // another. The construction of this mask will be constant folded. + // The mask for a logical right shift is nearly identical, the only + // difference is that 0xff is shifted right instead of left. + SDValue Cst255 = DAG.getConstant(0xff, dl, MVT::i16); + SDValue Splat255 = DAG.getSplat(VT16, dl, Cst255); + // The mask for the low bits is most simply expressed as an 8-bit + // field of all ones which is shifted in the exact same way the data + // is shifted but masked with 0xff. + SDValue MaskLowBits = DAG.getNode(Opc, dl, VT16, Splat255, Amt16); + MaskLowBits = DAG.getNode(ISD::AND, dl, VT16, MaskLowBits, Splat255); + SDValue Cst8 = DAG.getConstant(8, dl, MVT::i16); + SDValue Splat8 = DAG.getSplat(VT16, dl, Cst8); + // Thie mask for the high bits is the same as the mask for the low + // bits but shifted up by 8. + SDValue MaskHighBits = DAG.getNode(ISD::SHL, dl, VT16, MaskLowBits, Splat8); + SDValue Mask = DAG.getNode(ISD::OR, dl, VT16, MaskLowBits, MaskHighBits); + // Finally, we mask the shifted vector with the SWAR mask. + SDValue Masked = DAG.getNode(ISD::AND, dl, VT16, ShiftedR, Mask); + return DAG.getBitcast(VT, Masked); + } + } + } SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8); - // Extend constant shift amount to vXi16 (it doesn't matter if the type - // isn't legal). + // Extend to vXi16 to perform a MUL scale effectively as a MUL_LOHI (it + // doesn't matter if the type isn't legal). MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts); Amt = DAG.getZExtOrTrunc(Amt, dl, ExVT); Amt = DAG.getNode(ISD::SUB, dl, ExVT, DAG.getConstant(8, dl, ExVT), Amt); @@ -29885,7 +29971,6 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, } } - MVT VT16 = MVT::getVectorVT(MVT::i16, NumElts / 2); SDValue LoA = DAG.getBuildVector(VT16, dl, LoAmt); SDValue HiA = DAG.getBuildVector(VT16, dl, HiAmt); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 640a7bf3d672f0..3b6b154b9660cf 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1182,13 +1182,23 @@ class LoopVectorizationCostModel { InstructionCost Cost) { assert(VF.isVector() && "Expected VF >=2"); /// Broadcast this decicion to all instructions inside the group. - /// But the cost will be assigned to one instruction only. + /// When interleaving, the cost will only be assigned one instruction, the + /// insert position. For other cases, add the appropriate fraction of the + /// total cost to each instruction. This ensures accurate costs are used, + /// even if the insert position instruction is not used. + InstructionCost InsertPosCost = Cost; + InstructionCost OtherMemberCost = 0; + if (W != CM_Interleave) + OtherMemberCost = InsertPosCost = Cost / Grp->getNumMembers(); + ; for (unsigned Idx = 0; Idx < Grp->getFactor(); ++Idx) { if (auto *I = Grp->getMember(Idx)) { if (Grp->getInsertPos() == I) - WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, Cost); + WideningDecisions[std::make_pair(I, VF)] = + std::make_pair(W, InsertPosCost); else - WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, 0); + WideningDecisions[std::make_pair(I, VF)] = + std::make_pair(W, OtherMemberCost); } } } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c89a50fc7bd429..00d9f2909d71e2 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4819,12 +4819,6 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( // representation is better than just gather. auto CheckForShuffledLoads = [&, &TTI = *TTI](Align CommonAlignment, bool ProfitableGatherPointers) { - // FIXME: The following code has not been updated for non-power-of-2 - // vectors. The splitting logic here does not cover the original - // vector if the vector factor is not a power of two. FIXME - if (!has_single_bit(VL.size())) - return false; - // Compare masked gather cost and loads + insert subvector costs. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; auto [ScalarGEPCost, VectorGEPCost] = @@ -4874,6 +4868,13 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( constexpr unsigned ListLimit = 4; if (!TryRecursiveCheck || VL.size() < ListLimit) return MaskedGatherCost - GatherCost >= -SLPCostThreshold; + + // FIXME: The following code has not been updated for non-power-of-2 + // vectors. The splitting logic here does not cover the original + // vector if the vector factor is not a power of two. FIXME + if (!has_single_bit(VL.size())) + return false; + unsigned Sz = DL->getTypeSizeInBits(ScalarTy); unsigned MinVF = getMinVF(2 * Sz); DemandedElts.clearAllBits(); diff --git a/llvm/test/Analysis/CostModel/RISCV/cmp.ll b/llvm/test/Analysis/CostModel/RISCV/cmp.ll new file mode 100644 index 00000000000000..40938e000b64ec --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/cmp.ll @@ -0,0 +1,660 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+f -passes="print" -cost-kind=throughput --type-based-intrinsic-cost=true 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+f -passes="print" -cost-kind=throughput --type-based-intrinsic-cost=true 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,RV64 + +define void @icmp() { +; RV32-LABEL: 'icmp' +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = icmp slt <2 x i1> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = icmp slt <2 x i8> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = icmp slt <2 x i16> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = icmp slt <2 x i32> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = icmp slt <2 x i64> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <2 x i1> @llvm.vp.icmp.v2i1(<2 x i1> undef, <2 x i1> undef, metadata !"slt", <2 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <2 x i1> @llvm.vp.icmp.v2i8(<2 x i8> undef, <2 x i8> undef, metadata !"slt", <2 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <2 x i1> @llvm.vp.icmp.v2i16(<2 x i16> undef, <2 x i16> undef, metadata !"slt", <2 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <2 x i1> @llvm.vp.icmp.v2i32(<2 x i32> undef, <2 x i32> undef, metadata !"slt", <2 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call <2 x i1> @llvm.vp.icmp.v2i64(<2 x i64> undef, <2 x i64> undef, metadata !"slt", <2 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = icmp slt <4 x i1> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = icmp slt <4 x i8> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = icmp slt <4 x i16> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = icmp slt <4 x i32> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = icmp slt <4 x i64> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call <4 x i1> @llvm.vp.icmp.v4i1(<4 x i1> undef, <4 x i1> undef, metadata !"slt", <4 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <4 x i1> @llvm.vp.icmp.v4i8(<4 x i8> undef, <4 x i8> undef, metadata !"slt", <4 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call <4 x i1> @llvm.vp.icmp.v4i16(<4 x i16> undef, <4 x i16> undef, metadata !"slt", <4 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32> undef, <4 x i32> undef, metadata !"slt", <4 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call <4 x i1> @llvm.vp.icmp.v4i64(<4 x i64> undef, <4 x i64> undef, metadata !"slt", <4 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = icmp slt <8 x i1> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = icmp slt <8 x i8> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = icmp slt <8 x i16> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = icmp slt <8 x i32> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = icmp slt <8 x i64> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <8 x i1> @llvm.vp.icmp.v8i1(<8 x i1> undef, <8 x i1> undef, metadata !"slt", <8 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> undef, <8 x i8> undef, metadata !"slt", <8 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call <8 x i1> @llvm.vp.icmp.v8i16(<8 x i16> undef, <8 x i16> undef, metadata !"slt", <8 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> undef, <8 x i32> undef, metadata !"slt", <8 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %30 = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> undef, <8 x i64> undef, metadata !"slt", <8 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = icmp slt <16 x i1> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = icmp slt <16 x i8> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = icmp slt <16 x i16> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = icmp slt <16 x i32> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %35 = icmp slt <16 x i64> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call <16 x i1> @llvm.vp.icmp.v16i1(<16 x i1> undef, <16 x i1> undef, metadata !"slt", <16 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call <16 x i1> @llvm.vp.icmp.v16i8(<16 x i8> undef, <16 x i8> undef, metadata !"slt", <16 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %38 = call <16 x i1> @llvm.vp.icmp.v16i16(<16 x i16> undef, <16 x i16> undef, metadata !"slt", <16 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = call <16 x i1> @llvm.vp.icmp.v16i32(<16 x i32> undef, <16 x i32> undef, metadata !"slt", <16 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %40 = call <16 x i1> @llvm.vp.icmp.v16i64(<16 x i64> undef, <16 x i64> undef, metadata !"slt", <16 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = icmp slt <32 x i1> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %42 = icmp slt <32 x i8> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %43 = icmp slt <32 x i16> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = icmp slt <32 x i32> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %45 = icmp slt <32 x i64> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = call <32 x i1> @llvm.vp.icmp.v32i1(<32 x i1> undef, <32 x i1> undef, metadata !"slt", <32 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = call <32 x i1> @llvm.vp.icmp.v32i8(<32 x i8> undef, <32 x i8> undef, metadata !"slt", <32 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = call <32 x i1> @llvm.vp.icmp.v32i16(<32 x i16> undef, <32 x i16> undef, metadata !"slt", <32 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %49 = call <32 x i1> @llvm.vp.icmp.v32i32(<32 x i32> undef, <32 x i32> undef, metadata !"slt", <32 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %50 = call <32 x i1> @llvm.vp.icmp.v32i64(<32 x i64> undef, <32 x i64> undef, metadata !"slt", <32 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %51 = icmp slt <64 x i1> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %52 = icmp slt <64 x i8> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %53 = icmp slt <64 x i16> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %54 = icmp slt <64 x i32> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %55 = icmp slt <64 x i64> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %56 = call <64 x i1> @llvm.vp.icmp.v64i1(<64 x i1> undef, <64 x i1> undef, metadata !"slt", <64 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %57 = call <64 x i1> @llvm.vp.icmp.v64i8(<64 x i8> undef, <64 x i8> undef, metadata !"slt", <64 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %58 = call <64 x i1> @llvm.vp.icmp.v64i16(<64 x i16> undef, <64 x i16> undef, metadata !"slt", <64 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %59 = call <64 x i1> @llvm.vp.icmp.v64i32(<64 x i32> undef, <64 x i32> undef, metadata !"slt", <64 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %60 = call <64 x i1> @llvm.vp.icmp.v64i64(<64 x i64> undef, <64 x i64> undef, metadata !"slt", <64 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %61 = icmp slt <128 x i1> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %62 = icmp slt <128 x i8> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %63 = icmp slt <128 x i16> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %64 = icmp slt <128 x i32> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %65 = icmp slt <128 x i64> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = call <128 x i1> @llvm.vp.icmp.v128i1(<128 x i1> undef, <128 x i1> undef, metadata !"slt", <128 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %67 = call <128 x i1> @llvm.vp.icmp.v128i8(<128 x i8> undef, <128 x i8> undef, metadata !"slt", <128 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %68 = call <128 x i1> @llvm.vp.icmp.v128i16(<128 x i16> undef, <128 x i16> undef, metadata !"slt", <128 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %69 = call <128 x i1> @llvm.vp.icmp.v128i32(<128 x i32> undef, <128 x i32> undef, metadata !"slt", <128 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %70 = call <128 x i1> @llvm.vp.icmp.v128i64(<128 x i64> undef, <128 x i64> undef, metadata !"slt", <128 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %71 = icmp slt <256 x i1> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %72 = icmp slt <256 x i8> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %73 = icmp slt <256 x i16> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %74 = icmp slt <256 x i32> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %75 = icmp slt <256 x i64> undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %76 = call <256 x i1> @llvm.vp.icmp.v256i1(<256 x i1> undef, <256 x i1> undef, metadata !"slt", <256 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %77 = call <256 x i1> @llvm.vp.icmp.v256i8(<256 x i8> undef, <256 x i8> undef, metadata !"slt", <256 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %78 = call <256 x i1> @llvm.vp.icmp.v256i16(<256 x i16> undef, <256 x i16> undef, metadata !"slt", <256 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %79 = call <256 x i1> @llvm.vp.icmp.v256i32(<256 x i32> undef, <256 x i32> undef, metadata !"slt", <256 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %80 = call <256 x i1> @llvm.vp.icmp.v256i64(<256 x i64> undef, <256 x i64> undef, metadata !"slt", <256 x i1> undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %81 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %82 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %83 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %84 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %85 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %86 = call @llvm.vp.icmp.nxv1i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %87 = call @llvm.vp.icmp.nxv1i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %88 = call @llvm.vp.icmp.nxv1i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %89 = call @llvm.vp.icmp.nxv1i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %90 = call @llvm.vp.icmp.nxv1i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %91 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %92 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %93 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %94 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %95 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %96 = call @llvm.vp.icmp.nxv2i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %97 = call @llvm.vp.icmp.nxv2i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %98 = call @llvm.vp.icmp.nxv2i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %99 = call @llvm.vp.icmp.nxv2i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %100 = call @llvm.vp.icmp.nxv2i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %101 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %102 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %103 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %104 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %105 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %106 = call @llvm.vp.icmp.nxv4i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %107 = call @llvm.vp.icmp.nxv4i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %108 = call @llvm.vp.icmp.nxv4i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %109 = call @llvm.vp.icmp.nxv4i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %110 = call @llvm.vp.icmp.nxv4i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %111 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %112 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %113 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %114 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %115 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %116 = call @llvm.vp.icmp.nxv8i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %117 = call @llvm.vp.icmp.nxv8i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %118 = call @llvm.vp.icmp.nxv8i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = call @llvm.vp.icmp.nxv8i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %120 = call @llvm.vp.icmp.nxv8i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %121 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %122 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %123 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %124 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %125 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %126 = call @llvm.vp.icmp.nxv16i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %127 = call @llvm.vp.icmp.nxv16i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %128 = call @llvm.vp.icmp.nxv16i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %129 = call @llvm.vp.icmp.nxv16i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %130 = call @llvm.vp.icmp.nxv16i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %131 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %132 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %133 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %134 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %135 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %136 = call @llvm.vp.icmp.nxv32i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %137 = call @llvm.vp.icmp.nxv32i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %138 = call @llvm.vp.icmp.nxv32i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %139 = call @llvm.vp.icmp.nxv32i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %140 = call @llvm.vp.icmp.nxv32i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %141 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %142 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %143 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %144 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Invalid cost for instruction: %145 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %146 = call @llvm.vp.icmp.nxv64i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %147 = call @llvm.vp.icmp.nxv64i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %148 = call @llvm.vp.icmp.nxv64i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %149 = call @llvm.vp.icmp.nxv64i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: %150 = call @llvm.vp.icmp.nxv64i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %151 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %152 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %153 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %154 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Invalid cost for instruction: %155 = icmp slt undef, undef +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %156 = call @llvm.vp.icmp.nxv128i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %157 = call @llvm.vp.icmp.nxv128i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %158 = call @llvm.vp.icmp.nxv128i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %159 = call @llvm.vp.icmp.nxv128i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: %160 = call @llvm.vp.icmp.nxv128i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV64-LABEL: 'icmp' +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = icmp slt <2 x i1> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = icmp slt <2 x i8> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = icmp slt <2 x i16> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = icmp slt <2 x i32> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = icmp slt <2 x i64> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <2 x i1> @llvm.vp.icmp.v2i1(<2 x i1> undef, <2 x i1> undef, metadata !"slt", <2 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <2 x i1> @llvm.vp.icmp.v2i8(<2 x i8> undef, <2 x i8> undef, metadata !"slt", <2 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <2 x i1> @llvm.vp.icmp.v2i16(<2 x i16> undef, <2 x i16> undef, metadata !"slt", <2 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <2 x i1> @llvm.vp.icmp.v2i32(<2 x i32> undef, <2 x i32> undef, metadata !"slt", <2 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call <2 x i1> @llvm.vp.icmp.v2i64(<2 x i64> undef, <2 x i64> undef, metadata !"slt", <2 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = icmp slt <4 x i1> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = icmp slt <4 x i8> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = icmp slt <4 x i16> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = icmp slt <4 x i32> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = icmp slt <4 x i64> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call <4 x i1> @llvm.vp.icmp.v4i1(<4 x i1> undef, <4 x i1> undef, metadata !"slt", <4 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <4 x i1> @llvm.vp.icmp.v4i8(<4 x i8> undef, <4 x i8> undef, metadata !"slt", <4 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call <4 x i1> @llvm.vp.icmp.v4i16(<4 x i16> undef, <4 x i16> undef, metadata !"slt", <4 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32> undef, <4 x i32> undef, metadata !"slt", <4 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call <4 x i1> @llvm.vp.icmp.v4i64(<4 x i64> undef, <4 x i64> undef, metadata !"slt", <4 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = icmp slt <8 x i1> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = icmp slt <8 x i8> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = icmp slt <8 x i16> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = icmp slt <8 x i32> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = icmp slt <8 x i64> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <8 x i1> @llvm.vp.icmp.v8i1(<8 x i1> undef, <8 x i1> undef, metadata !"slt", <8 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> undef, <8 x i8> undef, metadata !"slt", <8 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call <8 x i1> @llvm.vp.icmp.v8i16(<8 x i16> undef, <8 x i16> undef, metadata !"slt", <8 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> undef, <8 x i32> undef, metadata !"slt", <8 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %30 = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> undef, <8 x i64> undef, metadata !"slt", <8 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = icmp slt <16 x i1> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = icmp slt <16 x i8> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = icmp slt <16 x i16> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = icmp slt <16 x i32> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %35 = icmp slt <16 x i64> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call <16 x i1> @llvm.vp.icmp.v16i1(<16 x i1> undef, <16 x i1> undef, metadata !"slt", <16 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call <16 x i1> @llvm.vp.icmp.v16i8(<16 x i8> undef, <16 x i8> undef, metadata !"slt", <16 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %38 = call <16 x i1> @llvm.vp.icmp.v16i16(<16 x i16> undef, <16 x i16> undef, metadata !"slt", <16 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = call <16 x i1> @llvm.vp.icmp.v16i32(<16 x i32> undef, <16 x i32> undef, metadata !"slt", <16 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %40 = call <16 x i1> @llvm.vp.icmp.v16i64(<16 x i64> undef, <16 x i64> undef, metadata !"slt", <16 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = icmp slt <32 x i1> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %42 = icmp slt <32 x i8> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %43 = icmp slt <32 x i16> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = icmp slt <32 x i32> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %45 = icmp slt <32 x i64> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = call <32 x i1> @llvm.vp.icmp.v32i1(<32 x i1> undef, <32 x i1> undef, metadata !"slt", <32 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = call <32 x i1> @llvm.vp.icmp.v32i8(<32 x i8> undef, <32 x i8> undef, metadata !"slt", <32 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = call <32 x i1> @llvm.vp.icmp.v32i16(<32 x i16> undef, <32 x i16> undef, metadata !"slt", <32 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %49 = call <32 x i1> @llvm.vp.icmp.v32i32(<32 x i32> undef, <32 x i32> undef, metadata !"slt", <32 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %50 = call <32 x i1> @llvm.vp.icmp.v32i64(<32 x i64> undef, <32 x i64> undef, metadata !"slt", <32 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %51 = icmp slt <64 x i1> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %52 = icmp slt <64 x i8> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %53 = icmp slt <64 x i16> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %54 = icmp slt <64 x i32> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %55 = icmp slt <64 x i64> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %56 = call <64 x i1> @llvm.vp.icmp.v64i1(<64 x i1> undef, <64 x i1> undef, metadata !"slt", <64 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %57 = call <64 x i1> @llvm.vp.icmp.v64i8(<64 x i8> undef, <64 x i8> undef, metadata !"slt", <64 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %58 = call <64 x i1> @llvm.vp.icmp.v64i16(<64 x i16> undef, <64 x i16> undef, metadata !"slt", <64 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %59 = call <64 x i1> @llvm.vp.icmp.v64i32(<64 x i32> undef, <64 x i32> undef, metadata !"slt", <64 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %60 = call <64 x i1> @llvm.vp.icmp.v64i64(<64 x i64> undef, <64 x i64> undef, metadata !"slt", <64 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %61 = icmp slt <128 x i1> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %62 = icmp slt <128 x i8> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %63 = icmp slt <128 x i16> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %64 = icmp slt <128 x i32> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %65 = icmp slt <128 x i64> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = call <128 x i1> @llvm.vp.icmp.v128i1(<128 x i1> undef, <128 x i1> undef, metadata !"slt", <128 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %67 = call <128 x i1> @llvm.vp.icmp.v128i8(<128 x i8> undef, <128 x i8> undef, metadata !"slt", <128 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %68 = call <128 x i1> @llvm.vp.icmp.v128i16(<128 x i16> undef, <128 x i16> undef, metadata !"slt", <128 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %69 = call <128 x i1> @llvm.vp.icmp.v128i32(<128 x i32> undef, <128 x i32> undef, metadata !"slt", <128 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %70 = call <128 x i1> @llvm.vp.icmp.v128i64(<128 x i64> undef, <128 x i64> undef, metadata !"slt", <128 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %71 = icmp slt <256 x i1> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %72 = icmp slt <256 x i8> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %73 = icmp slt <256 x i16> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %74 = icmp slt <256 x i32> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %75 = icmp slt <256 x i64> undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %76 = call <256 x i1> @llvm.vp.icmp.v256i1(<256 x i1> undef, <256 x i1> undef, metadata !"slt", <256 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %77 = call <256 x i1> @llvm.vp.icmp.v256i8(<256 x i8> undef, <256 x i8> undef, metadata !"slt", <256 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %78 = call <256 x i1> @llvm.vp.icmp.v256i16(<256 x i16> undef, <256 x i16> undef, metadata !"slt", <256 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %79 = call <256 x i1> @llvm.vp.icmp.v256i32(<256 x i32> undef, <256 x i32> undef, metadata !"slt", <256 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %80 = call <256 x i1> @llvm.vp.icmp.v256i64(<256 x i64> undef, <256 x i64> undef, metadata !"slt", <256 x i1> undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %81 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %82 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %83 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %84 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %85 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %86 = call @llvm.vp.icmp.nxv1i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %87 = call @llvm.vp.icmp.nxv1i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %88 = call @llvm.vp.icmp.nxv1i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %89 = call @llvm.vp.icmp.nxv1i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %90 = call @llvm.vp.icmp.nxv1i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %91 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %92 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %93 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %94 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %95 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %96 = call @llvm.vp.icmp.nxv2i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %97 = call @llvm.vp.icmp.nxv2i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %98 = call @llvm.vp.icmp.nxv2i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %99 = call @llvm.vp.icmp.nxv2i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %100 = call @llvm.vp.icmp.nxv2i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %101 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %102 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %103 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %104 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %105 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %106 = call @llvm.vp.icmp.nxv4i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %107 = call @llvm.vp.icmp.nxv4i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %108 = call @llvm.vp.icmp.nxv4i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %109 = call @llvm.vp.icmp.nxv4i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %110 = call @llvm.vp.icmp.nxv4i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %111 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %112 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %113 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %114 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %115 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %116 = call @llvm.vp.icmp.nxv8i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %117 = call @llvm.vp.icmp.nxv8i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %118 = call @llvm.vp.icmp.nxv8i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = call @llvm.vp.icmp.nxv8i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %120 = call @llvm.vp.icmp.nxv8i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %121 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %122 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %123 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %124 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %125 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %126 = call @llvm.vp.icmp.nxv16i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %127 = call @llvm.vp.icmp.nxv16i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %128 = call @llvm.vp.icmp.nxv16i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %129 = call @llvm.vp.icmp.nxv16i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %130 = call @llvm.vp.icmp.nxv16i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %131 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %132 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %133 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %134 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %135 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %136 = call @llvm.vp.icmp.nxv32i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %137 = call @llvm.vp.icmp.nxv32i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %138 = call @llvm.vp.icmp.nxv32i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %139 = call @llvm.vp.icmp.nxv32i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %140 = call @llvm.vp.icmp.nxv32i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %141 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %142 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %143 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %144 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %145 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %146 = call @llvm.vp.icmp.nxv64i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %147 = call @llvm.vp.icmp.nxv64i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %148 = call @llvm.vp.icmp.nxv64i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %149 = call @llvm.vp.icmp.nxv64i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %150 = call @llvm.vp.icmp.nxv64i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %151 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %152 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %153 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %154 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %155 = icmp slt undef, undef +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %156 = call @llvm.vp.icmp.nxv128i1( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %157 = call @llvm.vp.icmp.nxv128i8( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %158 = call @llvm.vp.icmp.nxv128i16( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %159 = call @llvm.vp.icmp.nxv128i32( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %160 = call @llvm.vp.icmp.nxv128i64( undef, undef, metadata !"slt", undef, i32 undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + icmp slt <2 x i1> undef, undef + icmp slt <2 x i8> undef, undef + icmp slt <2 x i16> undef, undef + icmp slt <2 x i32> undef, undef + icmp slt <2 x i64> undef, undef + call <2 x i1> @llvm.vp.icmp.v2i1(<2 x i1> undef, <2 x i1> undef, metadata !"slt", <2 x i1> undef, i32 undef) + call <2 x i1> @llvm.vp.icmp.v2i8(<2 x i8> undef, <2 x i8> undef, metadata !"slt", <2 x i1> undef, i32 undef) + call <2 x i1> @llvm.vp.icmp.v2i16(<2 x i16> undef, <2 x i16> undef, metadata !"slt", <2 x i1> undef, i32 undef) + call <2 x i1> @llvm.vp.icmp.v2i32(<2 x i32> undef, <2 x i32> undef, metadata !"slt", <2 x i1> undef, i32 undef) + call <2 x i1> @llvm.vp.icmp.v2i64(<2 x i64> undef, <2 x i64> undef, metadata !"slt", <2 x i1> undef, i32 undef) + + icmp slt <4 x i1> undef, undef + icmp slt <4 x i8> undef, undef + icmp slt <4 x i16> undef, undef + icmp slt <4 x i32> undef, undef + icmp slt <4 x i64> undef, undef + call <4 x i1> @llvm.vp.icmp.v4i1(<4 x i1> undef, <4 x i1> undef, metadata !"slt", <4 x i1> undef, i32 undef) + call <4 x i1> @llvm.vp.icmp.v4i8(<4 x i8> undef, <4 x i8> undef, metadata !"slt", <4 x i1> undef, i32 undef) + call <4 x i1> @llvm.vp.icmp.v4i16(<4 x i16> undef, <4 x i16> undef, metadata !"slt", <4 x i1> undef, i32 undef) + call <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32> undef, <4 x i32> undef, metadata !"slt", <4 x i1> undef, i32 undef) + call <4 x i1> @llvm.vp.icmp.v4i64(<4 x i64> undef, <4 x i64> undef, metadata !"slt", <4 x i1> undef, i32 undef) + + icmp slt <8 x i1> undef, undef + icmp slt <8 x i8> undef, undef + icmp slt <8 x i16> undef, undef + icmp slt <8 x i32> undef, undef + icmp slt <8 x i64> undef, undef + call <8 x i1> @llvm.vp.icmp.v8i1(<8 x i1> undef, <8 x i1> undef, metadata !"slt", <8 x i1> undef, i32 undef) + call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> undef, <8 x i8> undef, metadata !"slt", <8 x i1> undef, i32 undef) + call <8 x i1> @llvm.vp.icmp.v8i16(<8 x i16> undef, <8 x i16> undef, metadata !"slt", <8 x i1> undef, i32 undef) + call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> undef, <8 x i32> undef, metadata !"slt", <8 x i1> undef, i32 undef) + call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> undef, <8 x i64> undef, metadata !"slt", <8 x i1> undef, i32 undef) + + icmp slt <16 x i1> undef, undef + icmp slt <16 x i8> undef, undef + icmp slt <16 x i16> undef, undef + icmp slt <16 x i32> undef, undef + icmp slt <16 x i64> undef, undef + call <16 x i1> @llvm.vp.icmp.v16i1(<16 x i1> undef, <16 x i1> undef, metadata !"slt", <16 x i1> undef, i32 undef) + call <16 x i1> @llvm.vp.icmp.v16i8(<16 x i8> undef, <16 x i8> undef, metadata !"slt", <16 x i1> undef, i32 undef) + call <16 x i1> @llvm.vp.icmp.v16i16(<16 x i16> undef, <16 x i16> undef, metadata !"slt", <16 x i1> undef, i32 undef) + call <16 x i1> @llvm.vp.icmp.v16i32(<16 x i32> undef, <16 x i32> undef, metadata !"slt", <16 x i1> undef, i32 undef) + call <16 x i1> @llvm.vp.icmp.v16i64(<16 x i64> undef, <16 x i64> undef, metadata !"slt", <16 x i1> undef, i32 undef) + + icmp slt <32 x i1> undef, undef + icmp slt <32 x i8> undef, undef + icmp slt <32 x i16> undef, undef + icmp slt <32 x i32> undef, undef + icmp slt <32 x i64> undef, undef + call <32 x i1> @llvm.vp.icmp.v32i1(<32 x i1> undef, <32 x i1> undef, metadata !"slt", <32 x i1> undef, i32 undef) + call <32 x i1> @llvm.vp.icmp.v32i8(<32 x i8> undef, <32 x i8> undef, metadata !"slt", <32 x i1> undef, i32 undef) + call <32 x i1> @llvm.vp.icmp.v32i16(<32 x i16> undef, <32 x i16> undef, metadata !"slt", <32 x i1> undef, i32 undef) + call <32 x i1> @llvm.vp.icmp.v32i32(<32 x i32> undef, <32 x i32> undef, metadata !"slt", <32 x i1> undef, i32 undef) + call <32 x i1> @llvm.vp.icmp.v32i64(<32 x i64> undef, <32 x i64> undef, metadata !"slt", <32 x i1> undef, i32 undef) + + icmp slt <64 x i1> undef, undef + icmp slt <64 x i8> undef, undef + icmp slt <64 x i16> undef, undef + icmp slt <64 x i32> undef, undef + icmp slt <64 x i64> undef, undef + call <64 x i1> @llvm.vp.icmp.v64i1(<64 x i1> undef, <64 x i1> undef, metadata !"slt", <64 x i1> undef, i32 undef) + call <64 x i1> @llvm.vp.icmp.v64i8(<64 x i8> undef, <64 x i8> undef, metadata !"slt", <64 x i1> undef, i32 undef) + call <64 x i1> @llvm.vp.icmp.v64i16(<64 x i16> undef, <64 x i16> undef, metadata !"slt", <64 x i1> undef, i32 undef) + call <64 x i1> @llvm.vp.icmp.v64i32(<64 x i32> undef, <64 x i32> undef, metadata !"slt", <64 x i1> undef, i32 undef) + call <64 x i1> @llvm.vp.icmp.v64i64(<64 x i64> undef, <64 x i64> undef, metadata !"slt", <64 x i1> undef, i32 undef) + + icmp slt <128 x i1> undef, undef + icmp slt <128 x i8> undef, undef + icmp slt <128 x i16> undef, undef + icmp slt <128 x i32> undef, undef + icmp slt <128 x i64> undef, undef + call <128 x i1> @llvm.vp.icmp.v128i1(<128 x i1> undef, <128 x i1> undef, metadata !"slt", <128 x i1> undef, i32 undef) + call <128 x i1> @llvm.vp.icmp.v128i8(<128 x i8> undef, <128 x i8> undef, metadata !"slt", <128 x i1> undef, i32 undef) + call <128 x i1> @llvm.vp.icmp.v128i16(<128 x i16> undef, <128 x i16> undef, metadata !"slt", <128 x i1> undef, i32 undef) + call <128 x i1> @llvm.vp.icmp.v128i32(<128 x i32> undef, <128 x i32> undef, metadata !"slt", <128 x i1> undef, i32 undef) + call <128 x i1> @llvm.vp.icmp.v128i64(<128 x i64> undef, <128 x i64> undef, metadata !"slt", <128 x i1> undef, i32 undef) + + icmp slt <256 x i1> undef, undef + icmp slt <256 x i8> undef, undef + icmp slt <256 x i16> undef, undef + icmp slt <256 x i32> undef, undef + icmp slt <256 x i64> undef, undef + call <256 x i1> @llvm.vp.icmp.v256i1(<256 x i1> undef, <256 x i1> undef, metadata !"slt", <256 x i1> undef, i32 undef) + call <256 x i1> @llvm.vp.icmp.v256i8(<256 x i8> undef, <256 x i8> undef, metadata !"slt", <256 x i1> undef, i32 undef) + call <256 x i1> @llvm.vp.icmp.v256i16(<256 x i16> undef, <256 x i16> undef, metadata !"slt", <256 x i1> undef, i32 undef) + call <256 x i1> @llvm.vp.icmp.v256i32(<256 x i32> undef, <256 x i32> undef, metadata !"slt", <256 x i1> undef, i32 undef) + call <256 x i1> @llvm.vp.icmp.v256i64(<256 x i64> undef, <256 x i64> undef, metadata !"slt", <256 x i1> undef, i32 undef) + + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + call @llvm.vp.icmp.nxv1i1( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv1i8( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv1i16( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv1i32( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv1i64( undef, undef, metadata !"slt", undef, i32 undef) + + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + call @llvm.vp.icmp.nxv2i1( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv2i8( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv2i16( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv2i32( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv2i64( undef, undef, metadata !"slt", undef, i32 undef) + + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + call @llvm.vp.icmp.nxv4i1( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv4i8( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv4i16( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv4i32( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv4i64( undef, undef, metadata !"slt", undef, i32 undef) + + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + call @llvm.vp.icmp.nxv8i1( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv8i8( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv8i16( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv8i32( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv8i64( undef, undef, metadata !"slt", undef, i32 undef) + + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + call @llvm.vp.icmp.nxv16i1( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv16i8( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv16i16( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv16i32( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv16i64( undef, undef, metadata !"slt", undef, i32 undef) + + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + call @llvm.vp.icmp.nxv32i1( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv32i8( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv32i16( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv32i32( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv32i64( undef, undef, metadata !"slt", undef, i32 undef) + + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + call @llvm.vp.icmp.nxv64i1( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv64i8( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv64i16( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv64i32( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv64i64( undef, undef, metadata !"slt", undef, i32 undef) + + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + icmp slt undef, undef + call @llvm.vp.icmp.nxv128i1( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv128i8( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv128i16( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv128i32( undef, undef, metadata !"slt", undef, i32 undef) + call @llvm.vp.icmp.nxv128i64( undef, undef, metadata !"slt", undef, i32 undef) + + ret void +} + +define void @fcmp() { +; CHECK-LABEL: 'fcmp' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = fcmp olt <2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = fcmp olt <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <2 x i1> @llvm.vp.fcmp.v2f32(<2 x float> undef, <2 x float> undef, metadata !"olt", <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <2 x i1> @llvm.vp.fcmp.v2f64(<2 x double> undef, <2 x double> undef, metadata !"olt", <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = fcmp olt <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = fcmp olt <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> undef, <4 x float> undef, metadata !"olt", <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <4 x i1> @llvm.vp.fcmp.v4f64(<4 x double> undef, <4 x double> undef, metadata !"olt", <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = fcmp olt <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = fcmp olt <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> undef, <8 x float> undef, metadata !"olt", <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> undef, <8 x double> undef, metadata !"olt", <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = fcmp olt <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %14 = fcmp olt <16 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %15 = call <16 x i1> @llvm.vp.fcmp.v16f32(<16 x float> undef, <16 x float> undef, metadata !"olt", <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %16 = call <16 x i1> @llvm.vp.fcmp.v16f64(<16 x double> undef, <16 x double> undef, metadata !"olt", <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %17 = fcmp olt <32 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %18 = fcmp olt <32 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %19 = call <32 x i1> @llvm.vp.fcmp.v32f32(<32 x float> undef, <32 x float> undef, metadata !"olt", <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %20 = call <32 x i1> @llvm.vp.fcmp.v32f64(<32 x double> undef, <32 x double> undef, metadata !"olt", <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %21 = fcmp olt <64 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %22 = fcmp olt <64 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %23 = call <64 x i1> @llvm.vp.fcmp.v64f32(<64 x float> undef, <64 x float> undef, metadata !"olt", <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %24 = call <64 x i1> @llvm.vp.fcmp.v64f64(<64 x double> undef, <64 x double> undef, metadata !"olt", <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %25 = fcmp olt <128 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %26 = fcmp olt <128 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %27 = call <128 x i1> @llvm.vp.fcmp.v128f32(<128 x float> undef, <128 x float> undef, metadata !"olt", <128 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %28 = call <128 x i1> @llvm.vp.fcmp.v128f64(<128 x double> undef, <128 x double> undef, metadata !"olt", <128 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %29 = fcmp olt <256 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %30 = fcmp olt <256 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %31 = call <256 x i1> @llvm.vp.fcmp.v256f32(<256 x float> undef, <256 x float> undef, metadata !"olt", <256 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %32 = call <256 x i1> @llvm.vp.fcmp.v256f64(<256 x double> undef, <256 x double> undef, metadata !"olt", <256 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call @llvm.vp.fcmp.nxv1f32( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call @llvm.vp.fcmp.nxv1f64( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %38 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = call @llvm.vp.fcmp.nxv2f32( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %40 = call @llvm.vp.fcmp.nxv2f64( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %41 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %42 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %43 = call @llvm.vp.fcmp.nxv4f32( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %44 = call @llvm.vp.fcmp.nxv4f64( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %46 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %47 = call @llvm.vp.fcmp.nxv8f32( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %48 = call @llvm.vp.fcmp.nxv8f64( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %49 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %50 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %51 = call @llvm.vp.fcmp.nxv16f32( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %52 = call @llvm.vp.fcmp.nxv16f64( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %53 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %54 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %55 = call @llvm.vp.fcmp.nxv32f32( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %56 = call @llvm.vp.fcmp.nxv32f64( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %57 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %58 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %59 = call @llvm.vp.fcmp.nxv64f32( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %60 = call @llvm.vp.fcmp.nxv64f64( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %61 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %62 = fcmp olt undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %63 = call @llvm.vp.fcmp.nxv128f32( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %64 = call @llvm.vp.fcmp.nxv128f64( undef, undef, metadata !"olt", undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + fcmp olt <2 x float> undef, undef + fcmp olt <2 x double> undef, undef + call <2 x i1> @llvm.vp.fcmp.v2float(<2 x float> undef, <2 x float> undef, metadata !"olt", <2 x i1> undef, i32 undef) + call <2 x i1> @llvm.vp.fcmp.v2double(<2 x double> undef, <2 x double> undef, metadata !"olt", <2 x i1> undef, i32 undef) + + fcmp olt <4 x float> undef, undef + fcmp olt <4 x double> undef, undef + call <4 x i1> @llvm.vp.fcmp.v4float(<4 x float> undef, <4 x float> undef, metadata !"olt", <4 x i1> undef, i32 undef) + call <4 x i1> @llvm.vp.fcmp.v4double(<4 x double> undef, <4 x double> undef, metadata !"olt", <4 x i1> undef, i32 undef) + + fcmp olt <8 x float> undef, undef + fcmp olt <8 x double> undef, undef + call <8 x i1> @llvm.vp.fcmp.v8float(<8 x float> undef, <8 x float> undef, metadata !"olt", <8 x i1> undef, i32 undef) + call <8 x i1> @llvm.vp.fcmp.v8double(<8 x double> undef, <8 x double> undef, metadata !"olt", <8 x i1> undef, i32 undef) + + fcmp olt <16 x float> undef, undef + fcmp olt <16 x double> undef, undef + call <16 x i1> @llvm.vp.fcmp.v16float(<16 x float> undef, <16 x float> undef, metadata !"olt", <16 x i1> undef, i32 undef) + call <16 x i1> @llvm.vp.fcmp.v16double(<16 x double> undef, <16 x double> undef, metadata !"olt", <16 x i1> undef, i32 undef) + + fcmp olt <32 x float> undef, undef + fcmp olt <32 x double> undef, undef + call <32 x i1> @llvm.vp.fcmp.v32float(<32 x float> undef, <32 x float> undef, metadata !"olt", <32 x i1> undef, i32 undef) + call <32 x i1> @llvm.vp.fcmp.v32double(<32 x double> undef, <32 x double> undef, metadata !"olt", <32 x i1> undef, i32 undef) + + fcmp olt <64 x float> undef, undef + fcmp olt <64 x double> undef, undef + call <64 x i1> @llvm.vp.fcmp.v64float(<64 x float> undef, <64 x float> undef, metadata !"olt", <64 x i1> undef, i32 undef) + call <64 x i1> @llvm.vp.fcmp.v64double(<64 x double> undef, <64 x double> undef, metadata !"olt", <64 x i1> undef, i32 undef) + + fcmp olt <128 x float> undef, undef + fcmp olt <128 x double> undef, undef + call <128 x i1> @llvm.vp.fcmp.v128float(<128 x float> undef, <128 x float> undef, metadata !"olt", <128 x i1> undef, i32 undef) + call <128 x i1> @llvm.vp.fcmp.v128double(<128 x double> undef, <128 x double> undef, metadata !"olt", <128 x i1> undef, i32 undef) + + fcmp olt <256 x float> undef, undef + fcmp olt <256 x double> undef, undef + call <256 x i1> @llvm.vp.fcmp.v256float(<256 x float> undef, <256 x float> undef, metadata !"olt", <256 x i1> undef, i32 undef) + call <256 x i1> @llvm.vp.fcmp.v256double(<256 x double> undef, <256 x double> undef, metadata !"olt", <256 x i1> undef, i32 undef) + + fcmp olt undef, undef + fcmp olt undef, undef + call @llvm.vp.fcmp.nxv1float( undef, undef, metadata !"olt", undef, i32 undef) + call @llvm.vp.fcmp.nxv1double( undef, undef, metadata !"olt", undef, i32 undef) + + fcmp olt undef, undef + fcmp olt undef, undef + call @llvm.vp.fcmp.nxv2float( undef, undef, metadata !"olt", undef, i32 undef) + call @llvm.vp.fcmp.nxv2double( undef, undef, metadata !"olt", undef, i32 undef) + + fcmp olt undef, undef + fcmp olt undef, undef + call @llvm.vp.fcmp.nxv4float( undef, undef, metadata !"olt", undef, i32 undef) + call @llvm.vp.fcmp.nxv4double( undef, undef, metadata !"olt", undef, i32 undef) + + fcmp olt undef, undef + fcmp olt undef, undef + call @llvm.vp.fcmp.nxv8float( undef, undef, metadata !"olt", undef, i32 undef) + call @llvm.vp.fcmp.nxv8double( undef, undef, metadata !"olt", undef, i32 undef) + + fcmp olt undef, undef + fcmp olt undef, undef + call @llvm.vp.fcmp.nxv16float( undef, undef, metadata !"olt", undef, i32 undef) + call @llvm.vp.fcmp.nxv16double( undef, undef, metadata !"olt", undef, i32 undef) + + fcmp olt undef, undef + fcmp olt undef, undef + call @llvm.vp.fcmp.nxv32float( undef, undef, metadata !"olt", undef, i32 undef) + call @llvm.vp.fcmp.nxv32double( undef, undef, metadata !"olt", undef, i32 undef) + + fcmp olt undef, undef + fcmp olt undef, undef + call @llvm.vp.fcmp.nxv64float( undef, undef, metadata !"olt", undef, i32 undef) + call @llvm.vp.fcmp.nxv64double( undef, undef, metadata !"olt", undef, i32 undef) + + fcmp olt undef, undef + fcmp olt undef, undef + call @llvm.vp.fcmp.nxv128float( undef, undef, metadata !"olt", undef, i32 undef) + call @llvm.vp.fcmp.nxv128double( undef, undef, metadata !"olt", undef, i32 undef) + + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll index 29dce5f21173af..57ae02abc11996 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll @@ -82,26 +82,26 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 75 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 150 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 @@ -139,11 +139,11 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4 ; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4 ; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 400 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v4 = load float, ptr %in4, align 4 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll index 0e7b1c58e587c1..2cc5150f3c887f 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll @@ -108,34 +108,34 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 49 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 105 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 @@ -180,13 +180,13 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll index 8830aff579c322..1899741b8a3b40 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll @@ -121,22 +121,22 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4 ; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 ; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 @@ -145,14 +145,14 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4 ; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4 ; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 240 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load float, ptr %in7, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v7 = load float, ptr %in7, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 @@ -203,14 +203,14 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX512: LV: Found an estimated cost of 320 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load float, ptr %in7, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v7 = load float, ptr %in7, align 4 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll index cfd3d7841caa2d..2d4b300a8100a4 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll @@ -71,10 +71,10 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 ; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 160 for VF 64 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load double, ptr %in1, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll index 5ec5b517313856..5dfb25e25d6e61 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll @@ -88,12 +88,12 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 120 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 240 for VF 64 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v2 = load double, ptr %in2, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll index 450743df723251..bd88ca810728b1 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll @@ -107,14 +107,14 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll index 5e5c718dba97d2..9c0798631fdba5 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll @@ -72,21 +72,21 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 70 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 @@ -114,16 +114,16 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 100 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 200 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll index 62541fa2368c6a..99a735d3f552c4 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll @@ -133,18 +133,18 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 240 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll index cfed8554b978b8..168e9166ea1dd4 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll @@ -94,27 +94,27 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 49 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 98 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 @@ -152,20 +152,20 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 140 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load double, ptr %in6, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v6 = load double, ptr %in6, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll index 07939b914d0224..919a17e8729e0f 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll @@ -105,30 +105,30 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8 +; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8 +; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 @@ -163,22 +163,22 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load double, ptr %in7, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v7 = load double, ptr %in7, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll index 964a9b660942e1..6737c722b46ff9 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll @@ -93,31 +93,31 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 330 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll index 6653198397dd25..46d56a75f1c4de 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll @@ -123,41 +123,41 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 231 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 462 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll index b3a5cbeccc09c4..4d65abdaf688c9 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll @@ -138,46 +138,46 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX2: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX2: LV: Found an estimated cost of 528 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i16, ptr %in7, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v7 = load i16, ptr %in7, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll index c0ea210385dfda..28a6443efcfb9d 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll @@ -73,8 +73,8 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 ; AVX512: LV: Found an estimated cost of 50 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 160 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i32, ptr %in1, align 4 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll index 2a261ca4de4fa4..5cad7bf662c5b3 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll @@ -82,26 +82,26 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 170 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 @@ -139,11 +139,11 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4 ; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4 ; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 400 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v4 = load i32, ptr %in4, align 4 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll index 8bf3071d29fbe1..cfb83d4a023653 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll @@ -108,34 +108,34 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 119 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 238 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 @@ -180,13 +180,13 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll index 3182de2df058a8..775785462de474 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll @@ -121,22 +121,22 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4 ; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 ; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 @@ -145,14 +145,14 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4 ; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4 ; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 272 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i32, ptr %in7, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v7 = load i32, ptr %in7, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 @@ -203,14 +203,14 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX512: LV: Found an estimated cost of 320 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i32, ptr %in7, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v7 = load i32, ptr %in7, align 4 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll index 27e2ee0392615b..cf350cc9f8307e 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll @@ -71,10 +71,10 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 ; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 160 for VF 64 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i64, ptr %in1, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll index c37723257c1f75..9ca0d8c9d7e33b 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll @@ -88,12 +88,12 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 120 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 240 for VF 64 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v2 = load i64, ptr %in2, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll index 2eb7c5e93078f5..86ee6c8b30bda0 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll @@ -107,14 +107,14 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll index c11da4309737d2..f6143d4ae9f3b1 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll @@ -72,21 +72,21 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 90 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 @@ -114,16 +114,16 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 100 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 200 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll index de57af6ebe3984..43dc53d683de39 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll @@ -133,18 +133,18 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 240 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll index 949c1af1fdad3b..70ed74dcc26da3 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll @@ -94,27 +94,27 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 126 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 @@ -152,20 +152,20 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 140 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i64, ptr %in6, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v6 = load i64, ptr %in6, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll index 4388ccfbdcfc4f..401e4de111d73f 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll @@ -105,30 +105,30 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 144 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 @@ -163,22 +163,22 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i64, ptr %in7, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v7 = load i64, ptr %in7, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll index 6078fb440f9d13..ef3c80c27550a3 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll @@ -93,31 +93,31 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 325 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll index 778a4e7dfd7d9d..8e7c316b69b3ac 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll @@ -123,41 +123,41 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 224 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 455 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll index a230b5a0b1f2b7..752cc229922bea 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll @@ -138,46 +138,46 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX2: LV: Found an estimated cost of 256 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX2: LV: Found an estimated cost of 520 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll index c1a66c1a41d74f..ed2bb3f750b01d 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll @@ -163,22 +163,22 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8 ; AVX512: LV: Found an estimated cost of 46 for VF 4 For instruction: store double %v7, ptr %out7, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 8 For instruction: store double %v7, ptr %out7, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 160 for VF 16 For instruction: store double %v7, ptr %out7, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v0, ptr %out0, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v1, ptr %out1, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v2, ptr %out2, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v3, ptr %out3, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v4, ptr %out4, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v5, ptr %out5, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v6, ptr %out6, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v7, ptr %out7, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v0, ptr %out0, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v1, ptr %out1, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v2, ptr %out2, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v3, ptr %out3, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v4, ptr %out4, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v5, ptr %out5, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v6, ptr %out6, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v7, ptr %out7, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll index 7be9577960efe3..a5398771041291 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll @@ -163,22 +163,22 @@ define void @test() { ; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 ; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 ; AVX512: LV: Found an estimated cost of 46 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 160 for VF 16 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v0, ptr %out0, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v0, ptr %out0, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v7, ptr %out7, align 8 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll index 13a844230f89d9..41dd266d0a87ac 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll @@ -38,8 +38,8 @@ define void @test1(ptr noalias nocapture %points, ptr noalias nocapture readonly ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 12 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2 @@ -99,8 +99,8 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 10 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2 diff --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll index 0a3b9a070c2b32..d9d80f1cb50ee1 100644 --- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll @@ -648,3 +648,178 @@ loop: exit: ret void } + +define void @uitofp_v8i16_to_v8f64(ptr nocapture noundef readonly %x, ptr nocapture noundef writeonly %y, i32 noundef %n) { +; CHECK-LABEL: uitofp_v8i16_to_v8f64: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: Lloh22: +; CHECK-NEXT: adrp x8, lCPI10_0@PAGE +; CHECK-NEXT: Lloh23: +; CHECK-NEXT: adrp x9, lCPI10_1@PAGE +; CHECK-NEXT: Lloh24: +; CHECK-NEXT: adrp x10, lCPI10_2@PAGE +; CHECK-NEXT: Lloh25: +; CHECK-NEXT: ldr q0, [x8, lCPI10_0@PAGEOFF] +; CHECK-NEXT: Lloh26: +; CHECK-NEXT: adrp x8, lCPI10_3@PAGE +; CHECK-NEXT: Lloh27: +; CHECK-NEXT: ldr q1, [x9, lCPI10_1@PAGEOFF] +; CHECK-NEXT: Lloh28: +; CHECK-NEXT: ldr q2, [x10, lCPI10_2@PAGEOFF] +; CHECK-NEXT: Lloh29: +; CHECK-NEXT: ldr q3, [x8, lCPI10_3@PAGEOFF] +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: LBB10_1: ; %vector.body +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr q4, [x0, x8] +; CHECK-NEXT: add x9, x1, x8 +; CHECK-NEXT: add x8, x8, #64 +; CHECK-NEXT: cmp x8, #2, lsl #12 ; =8192 +; CHECK-NEXT: tbl.16b v5, { v4 }, v0 +; CHECK-NEXT: tbl.16b v6, { v4 }, v1 +; CHECK-NEXT: tbl.16b v7, { v4 }, v2 +; CHECK-NEXT: tbl.16b v4, { v4 }, v3 +; CHECK-NEXT: ucvtf.2d v5, v5 +; CHECK-NEXT: ucvtf.2d v6, v6 +; CHECK-NEXT: ucvtf.2d v7, v7 +; CHECK-NEXT: ucvtf.2d v4, v4 +; CHECK-NEXT: stp q6, q5, [x9, #32] +; CHECK-NEXT: stp q4, q7, [x9] +; CHECK-NEXT: b.ne LBB10_1 +; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh26, Lloh29 +; CHECK-NEXT: .loh AdrpLdr Lloh24, Lloh28 +; CHECK-NEXT: .loh AdrpLdr Lloh23, Lloh27 +; CHECK-NEXT: .loh AdrpAdrp Lloh22, Lloh26 +; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh25 +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %.idx = shl nsw i64 %index, 3 + %g = getelementptr inbounds i8, ptr %x, i64 %.idx + %wide.vec = load <8 x i16>, ptr %g, align 2 + %u = uitofp <8 x i16> %wide.vec to <8 x double> + %h = getelementptr inbounds double, ptr %y, i64 %index + store <8 x double> %u, ptr %h, align 8 + %index.next = add nuw i64 %index, 8 + %c = icmp eq i64 %index.next, 1024 + br i1 %c, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: + ret void +} + +define void @uitofp_ld4_v32i16_to_v8f64(ptr nocapture noundef readonly %x, ptr nocapture noundef writeonly %y, i32 noundef %n) { +; CHECK-LABEL: uitofp_ld4_v32i16_to_v8f64: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: Lloh30: +; CHECK-NEXT: adrp x8, lCPI11_0@PAGE +; CHECK-NEXT: Lloh31: +; CHECK-NEXT: adrp x9, lCPI11_1@PAGE +; CHECK-NEXT: Lloh32: +; CHECK-NEXT: adrp x10, lCPI11_2@PAGE +; CHECK-NEXT: Lloh33: +; CHECK-NEXT: ldr q0, [x8, lCPI11_0@PAGEOFF] +; CHECK-NEXT: Lloh34: +; CHECK-NEXT: adrp x8, lCPI11_3@PAGE +; CHECK-NEXT: Lloh35: +; CHECK-NEXT: ldr q1, [x9, lCPI11_1@PAGEOFF] +; CHECK-NEXT: Lloh36: +; CHECK-NEXT: ldr q2, [x10, lCPI11_2@PAGEOFF] +; CHECK-NEXT: Lloh37: +; CHECK-NEXT: ldr q3, [x8, lCPI11_3@PAGEOFF] +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: LBB11_1: ; %vector.body +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add x9, x0, x8 +; CHECK-NEXT: ldp q5, q4, [x9, #32] +; CHECK-NEXT: ldp q7, q6, [x9] +; CHECK-NEXT: add x9, x1, x8 +; CHECK-NEXT: add x8, x8, #64 +; CHECK-NEXT: tbl.16b v16, { v4 }, v0 +; CHECK-NEXT: tbl.16b v17, { v5 }, v0 +; CHECK-NEXT: tbl.16b v21, { v4 }, v1 +; CHECK-NEXT: tbl.16b v18, { v6 }, v0 +; CHECK-NEXT: tbl.16b v19, { v7 }, v0 +; CHECK-NEXT: tbl.16b v20, { v7 }, v1 +; CHECK-NEXT: tbl.16b v22, { v5 }, v1 +; CHECK-NEXT: tbl.16b v23, { v5 }, v2 +; CHECK-NEXT: tbl.16b v24, { v4 }, v2 +; CHECK-NEXT: tbl.16b v25, { v7 }, v2 +; CHECK-NEXT: tbl.16b v5, { v5 }, v3 +; CHECK-NEXT: tbl.16b v4, { v4 }, v3 +; CHECK-NEXT: tbl.16b v7, { v7 }, v3 +; CHECK-NEXT: tbl.16b v26, { v6 }, v1 +; CHECK-NEXT: tbl.16b v27, { v6 }, v2 +; CHECK-NEXT: tbl.16b v6, { v6 }, v3 +; CHECK-NEXT: ucvtf.2d v17, v17 +; CHECK-NEXT: ucvtf.2d v16, v16 +; CHECK-NEXT: ucvtf.2d v19, v19 +; CHECK-NEXT: ucvtf.2d v18, v18 +; CHECK-NEXT: ucvtf.2d v22, v22 +; CHECK-NEXT: ucvtf.2d v23, v23 +; CHECK-NEXT: ucvtf.2d v5, v5 +; CHECK-NEXT: ucvtf.2d v21, v21 +; CHECK-NEXT: ucvtf.2d v24, v24 +; CHECK-NEXT: ucvtf.2d v4, v4 +; CHECK-NEXT: cmp x8, #2, lsl #12 ; =8192 +; CHECK-NEXT: ucvtf.2d v20, v20 +; CHECK-NEXT: ucvtf.2d v25, v25 +; CHECK-NEXT: ucvtf.2d v7, v7 +; CHECK-NEXT: ucvtf.2d v26, v26 +; CHECK-NEXT: ucvtf.2d v27, v27 +; CHECK-NEXT: ucvtf.2d v6, v6 +; CHECK-NEXT: fadd.2d v17, v22, v17 +; CHECK-NEXT: fadd.2d v5, v23, v5 +; CHECK-NEXT: fadd.2d v16, v21, v16 +; CHECK-NEXT: fadd.2d v4, v24, v4 +; CHECK-NEXT: fadd.2d v19, v20, v19 +; CHECK-NEXT: fadd.2d v7, v25, v7 +; CHECK-NEXT: fadd.2d v18, v26, v18 +; CHECK-NEXT: fadd.2d v6, v27, v6 +; CHECK-NEXT: fadd.2d v5, v17, v5 +; CHECK-NEXT: fadd.2d v4, v16, v4 +; CHECK-NEXT: fadd.2d v7, v19, v7 +; CHECK-NEXT: fadd.2d v6, v18, v6 +; CHECK-NEXT: stp q5, q4, [x9, #32] +; CHECK-NEXT: stp q7, q6, [x9] +; CHECK-NEXT: b.ne LBB11_1 +; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh34, Lloh37 +; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh36 +; CHECK-NEXT: .loh AdrpLdr Lloh31, Lloh35 +; CHECK-NEXT: .loh AdrpAdrp Lloh30, Lloh34 +; CHECK-NEXT: .loh AdrpLdr Lloh30, Lloh33 +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %.idx = shl nsw i64 %index, 3 + %0 = getelementptr inbounds i8, ptr %x, i64 %.idx + %wide.vec = load <32 x i16>, ptr %0, align 2 + %strided.vec = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <8 x i32> + %strided.vec36 = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <8 x i32> + %strided.vec37 = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <8 x i32> + %strided.vec38 = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <8 x i32> + %1 = uitofp <8 x i16> %strided.vec to <8 x double> + %2 = uitofp <8 x i16> %strided.vec36 to <8 x double> + %3 = fadd fast <8 x double> %2, %1 + %4 = uitofp <8 x i16> %strided.vec37 to <8 x double> + %5 = fadd fast <8 x double> %3, %4 + %6 = uitofp <8 x i16> %strided.vec38 to <8 x double> + %7 = fadd fast <8 x double> %5, %6 + %8 = getelementptr inbounds double, ptr %y, i64 %index + store <8 x double> %7, ptr %8, align 8 + %index.next = add nuw i64 %index, 8 + %9 = icmp eq i64 %index.next, 1024 + br i1 %9, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: + ret void +} + diff --git a/llvm/test/CodeGen/DirectX/BufferLoad.ll b/llvm/test/CodeGen/DirectX/BufferLoad.ll index 4b9fb52f0b5299..e3a4441ad6e833 100644 --- a/llvm/test/CodeGen/DirectX/BufferLoad.ll +++ b/llvm/test/CodeGen/DirectX/BufferLoad.ll @@ -4,6 +4,7 @@ target triple = "dxil-pc-shadermodel6.6-compute" declare void @scalar_user(float) declare void @vector_user(<4 x float>) +declare void @check_user(i1) define void @loadv4f32() { ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding @@ -128,6 +129,27 @@ define void @loadv2f32() { ret void } +define void @loadv4f32_checkbit() { + ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding + ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]] + %buffer = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) + %data0 = call {<4 x float>, i1} @llvm.dx.typedBufferLoad.checkbit.f32( + target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0) + + ; CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA0]], 4 + ; CHECK: [[MAPPED:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]] + %check = extractvalue {<4 x float>, i1} %data0, 1 + + ; CHECK: call void @check_user(i1 [[MAPPED]]) + call void @check_user(i1 %check) + + ret void +} + define void @loadv4i32() { ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]] diff --git a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll index d0c80c018b8d7e..dbdd2e61df7a3b 100644 --- a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll +++ b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll @@ -19,14 +19,14 @@ define void @test_bindings() { %typed0 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0( i32 3, i32 5, i32 1, i32 4, i1 false) - ; CHECK: [[BUF0:%[0-9]*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 5, i32 5, i32 3, i8 1 }, i32 4, i1 false) + ; CHECK: [[BUF0:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 5, i32 5, i32 3, i8 1 }, i32 4, i1 false) ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BUF0]], %dx.types.ResourceProperties { i32 4106, i32 1033 }) ; RWBuffer Buf : register(u7, space2) %typed1 = call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_1_0_0t( i32 2, i32 7, i32 1, i32 6, i1 false) - ; CHECK: [[BUF1:%[0-9]*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 7, i32 7, i32 2, i8 1 }, i32 6, i1 false) + ; CHECK: [[BUF1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 7, i32 7, i32 2, i8 1 }, i32 6, i1 false) ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BUF1]], %dx.types.ResourceProperties { i32 4106, i32 260 }) ; Buffer Buf[24] : register(t3, space5) @@ -35,7 +35,7 @@ define void @test_bindings() { %typed2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_0_0_0t( i32 5, i32 3, i32 24, i32 7, i1 false) - ; CHECK: [[BUF2:%[0-9]*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 3, i32 26, i32 5, i8 0 }, i32 7, i1 false) + ; CHECK: [[BUF2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 3, i32 26, i32 5, i8 0 }, i32 7, i1 false) ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BUF2]], %dx.types.ResourceProperties { i32 10, i32 1029 }) ; struct S { float4 a; uint4 b; }; @@ -43,14 +43,14 @@ define void @test_bindings() { %struct0 = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0) @llvm.dx.handle.fromBinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0t( i32 4, i32 2, i32 1, i32 10, i1 true) - ; CHECK: [[BUF3:%[0-9]*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 2, i32 2, i32 4, i8 0 }, i32 10, i1 true) + ; CHECK: [[BUF3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 2, i32 2, i32 4, i8 0 }, i32 10, i1 true) ; CHECK: = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BUF3]], %dx.types.ResourceProperties { i32 1036, i32 32 }) ; ByteAddressBuffer Buf : register(t8, space1) %byteaddr0 = call target("dx.RawBuffer", i8, 0, 0) @llvm.dx.handle.fromBinding.tdx.RawBuffer_i8_0_0t( i32 1, i32 8, i32 1, i32 12, i1 false) - ; CHECK: [[BUF4:%[0-9]*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 8, i32 8, i32 1, i8 0 }, i32 12, i1 false) + ; CHECK: [[BUF4:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 8, i32 8, i32 1, i8 0 }, i32 12, i1 false) ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BUF4]], %dx.types.ResourceProperties { i32 11, i32 0 }) ; Buffer Buf[] : register(t0) @@ -59,7 +59,7 @@ define void @test_bindings() { %typed3 = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0t( i32 0, i32 0, i32 -1, i32 %typed3_ix, i1 false) - ; CHECK: [[BUF5:%[0-9]*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 0, i32 -1, i32 0, i8 0 }, i32 %typed3_ix, i1 false) + ; CHECK: [[BUF5:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 0, i32 -1, i32 0, i8 0 }, i32 %typed3_ix, i1 false) ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BUF5]], %dx.types.ResourceProperties { i32 10, i32 1033 }) ret void diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll index f6db9c429dba57..176dfee11cfb09 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll @@ -1,18 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes OPT -; RUN: llc < %s -mcpu=sm_70 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes PTX +; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda -mcpu=sm_70 -mattr=+ptx77 | FileCheck %s --check-prefixes OPT +; RUN: llc < %s --mtriple nvptx64-nvidia-cuda -mcpu=sm_70 -mattr=+ptx77 | FileCheck %s --check-prefixes PTX define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %out, i32 %n) { ; PTX-LABEL: grid_const_int( -; PTX-NOT: ld.u32 -; PTX: ld.param.{{.*}} [[R2:%.*]], [grid_const_int_param_0]; -; +; PTX: { +; PTX-NEXT: .reg .b32 %r<4>; +; PTX-NEXT: .reg .b64 %rd<3>; +; PTX-EMPTY: +; PTX-NEXT: // %bb.0: +; PTX-NEXT: ld.param.u64 %rd1, [grid_const_int_param_2]; +; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1; +; PTX-NEXT: ld.param.u32 %r1, [grid_const_int_param_1]; +; PTX-NEXT: ld.param.u32 %r2, [grid_const_int_param_0]; +; PTX-NEXT: add.s32 %r3, %r2, %r1; +; PTX-NEXT: st.global.u32 [%rd2], %r3; +; PTX-NEXT: ret; ; OPT-LABEL: define void @grid_const_int( -; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) { -; OPT-NOT: alloca -; OPT: [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) -; OPT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4 -; +; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; OPT-NEXT: [[OUT2:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; OPT-NEXT: [[OUT3:%.*]] = addrspacecast ptr addrspace(1) [[OUT2]] to ptr +; OPT-NEXT: [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) +; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4 +; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]] +; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3]], align 4 +; OPT-NEXT: ret void %tmp = load i32, ptr %input1, align 4 %add = add i32 %tmp, %input2 store i32 %add, ptr %out @@ -24,19 +36,29 @@ define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %ou define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){ ; PTX-LABEL: grid_const_struct( ; PTX: { -; PTX-NOT: ld.u32 -; PTX: ld.param.{{.*}} [[R1:%.*]], [grid_const_struct_param_0]; -; PTX: ld.param.{{.*}} [[R2:%.*]], [grid_const_struct_param_0+4]; -; +; PTX-NEXT: .reg .b32 %r<4>; +; PTX-NEXT: .reg .b64 %rd<3>; +; PTX-EMPTY: +; PTX-NEXT: // %bb.0: +; PTX-NEXT: ld.param.u64 %rd1, [grid_const_struct_param_1]; +; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1; +; PTX-NEXT: ld.param.u32 %r1, [grid_const_struct_param_0]; +; PTX-NEXT: ld.param.u32 %r2, [grid_const_struct_param_0+4]; +; PTX-NEXT: add.s32 %r3, %r1, %r2; +; PTX-NEXT: st.global.u32 [%rd2], %r3; +; PTX-NEXT: ret; ; OPT-LABEL: define void @grid_const_struct( -; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) { -; OPT-NOT: alloca -; OPT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) -; OPT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0 -; OPT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1 -; OPT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4 -; OPT: [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4 -; +; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[OUT4:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; OPT-NEXT: [[OUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUT4]] to ptr +; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) +; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0 +; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1 +; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4 +; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4 +; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]] +; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5]], align 4 +; OPT-NEXT: ret void %gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0 %gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1 %int1 = load i32, ptr %gep1 @@ -49,41 +71,85 @@ define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){ define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) { ; PTX-LABEL: grid_const_escape( ; PTX: { -; PTX-NOT: .local -; PTX: cvta.param.{{.*}} +; PTX-NEXT: .reg .b32 %r<3>; +; PTX-NEXT: .reg .b64 %rd<4>; +; PTX-EMPTY: +; PTX-NEXT: // %bb.0: +; PTX-NEXT: mov.b64 %rd1, grid_const_escape_param_0; +; PTX-NEXT: mov.u64 %rd2, %rd1; +; PTX-NEXT: cvta.param.u64 %rd3, %rd2; +; PTX-NEXT: { // callseq 0, 0 +; PTX-NEXT: .param .b64 param0; +; PTX-NEXT: st.param.b64 [param0+0], %rd3; +; PTX-NEXT: .param .b32 retval0; +; PTX-NEXT: call.uni (retval0), +; PTX-NEXT: escape, +; PTX-NEXT: ( +; PTX-NEXT: param0 +; PTX-NEXT: ); +; PTX-NEXT: ld.param.b32 %r1, [retval0+0]; +; PTX-NEXT: } // callseq 0 +; PTX-NEXT: ret; ; OPT-LABEL: define void @grid_const_escape( -; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) { -; OPT-NOT: alloca [[STRUCT_S]] -; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) -; OPT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]]) -; OPT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]]) -; +; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) +; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]]) +; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]]) +; OPT-NEXT: ret void %call = call i32 @escape(ptr %input) ret void } define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 %a, ptr byval(i32) align 4 %b) { ; PTX-LABEL: multiple_grid_const_escape( -; PTX: mov.{{.*}} [[RD1:%.*]], multiple_grid_const_escape_param_0; -; PTX: mov.{{.*}} [[RD2:%.*]], multiple_grid_const_escape_param_2; -; PTX: mov.{{.*}} [[RD3:%.*]], [[RD2]]; -; PTX: mov.{{.*}} [[RD4:%.*]], [[RD1]]; -; PTX: cvta.param.{{.*}} [[RD5:%.*]], [[RD4]]; -; PTX: cvta.param.{{.*}} [[RD6:%.*]], [[RD3]]; -; PTX: { -; PTX: st.param.b64 [param0+0], [[RD5]]; -; PTX: st.param.b64 [param2+0], [[RD6]]; -; +; PTX: { +; PTX-NEXT: .local .align 4 .b8 __local_depot3[4]; +; PTX-NEXT: .reg .b64 %SP; +; PTX-NEXT: .reg .b64 %SPL; +; PTX-NEXT: .reg .b32 %r<4>; +; PTX-NEXT: .reg .b64 %rd<9>; +; PTX-EMPTY: +; PTX-NEXT: // %bb.0: +; PTX-NEXT: mov.u64 %SPL, __local_depot3; +; PTX-NEXT: cvta.local.u64 %SP, %SPL; +; PTX-NEXT: mov.b64 %rd1, multiple_grid_const_escape_param_0; +; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_2; +; PTX-NEXT: mov.u64 %rd3, %rd2; +; PTX-NEXT: ld.param.u32 %r1, [multiple_grid_const_escape_param_1]; +; PTX-NEXT: cvta.param.u64 %rd4, %rd3; +; PTX-NEXT: mov.u64 %rd5, %rd1; +; PTX-NEXT: cvta.param.u64 %rd6, %rd5; +; PTX-NEXT: add.u64 %rd7, %SP, 0; +; PTX-NEXT: add.u64 %rd8, %SPL, 0; +; PTX-NEXT: st.local.u32 [%rd8], %r1; +; PTX-NEXT: { // callseq 1, 0 +; PTX-NEXT: .param .b64 param0; +; PTX-NEXT: st.param.b64 [param0+0], %rd6; +; PTX-NEXT: .param .b64 param1; +; PTX-NEXT: st.param.b64 [param1+0], %rd7; +; PTX-NEXT: .param .b64 param2; +; PTX-NEXT: st.param.b64 [param2+0], %rd4; +; PTX-NEXT: .param .b32 retval0; +; PTX-NEXT: call.uni (retval0), +; PTX-NEXT: escape3, +; PTX-NEXT: ( +; PTX-NEXT: param0, +; PTX-NEXT: param1, +; PTX-NEXT: param2 +; PTX-NEXT: ); +; PTX-NEXT: ld.param.b32 %r2, [retval0+0]; +; PTX-NEXT: } // callseq 1 +; PTX-NEXT: ret; ; OPT-LABEL: define void @multiple_grid_const_escape( -; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) { -; OPT: [[B_PARAM:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101) -; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) -; OPT-NOT: alloca %struct.s -; OPT: [[A_ADDR:%.*]] = alloca i32, align 4 -; OPT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]]) -; OPT: [[B_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[B_PARAM]]) +; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[B_PARAM:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101) +; OPT-NEXT: [[B_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[B_PARAM]]) +; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) +; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]]) +; OPT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; OPT-NEXT: [[CALL:%.*]] = call i32 @escape3(ptr [[INPUT_PARAM_GEN]], ptr [[A_ADDR]], ptr [[B_PARAM_GEN]]) -; +; OPT-NEXT: ret void %a.addr = alloca i32, align 4 store i32 %a, ptr %a.addr, align 4 %call = call i32 @escape3(ptr %input, ptr %a.addr, ptr %b) @@ -92,40 +158,58 @@ define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 define void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %addr) { ; PTX-LABEL: grid_const_memory_escape( -; PTX-NOT: .local -; PTX: mov.b64 [[RD1:%.*]], grid_const_memory_escape_param_0; -; PTX: cvta.param.u64 [[RD3:%.*]], [[RD2:%.*]]; -; PTX: st.global.u64 [[[RD4:%.*]]], [[RD3]]; -; +; PTX: { +; PTX-NEXT: .reg .b64 %rd<6>; +; PTX-EMPTY: +; PTX-NEXT: // %bb.0: +; PTX-NEXT: mov.b64 %rd1, grid_const_memory_escape_param_0; +; PTX-NEXT: ld.param.u64 %rd2, [grid_const_memory_escape_param_1]; +; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2; +; PTX-NEXT: mov.u64 %rd4, %rd1; +; PTX-NEXT: cvta.param.u64 %rd5, %rd4; +; PTX-NEXT: st.global.u64 [%rd3], %rd5; +; PTX-NEXT: ret; ; OPT-LABEL: define void @grid_const_memory_escape( -; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) { -; OPT-NOT: alloca [[STRUCT_S]] -; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) -; OPT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]]) -; OPT: store ptr [[INPUT_PARAM_GEN]], ptr {{.*}}, align 8 -; +; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[ADDR4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1) +; OPT-NEXT: [[ADDR5:%.*]] = addrspacecast ptr addrspace(1) [[ADDR4]] to ptr +; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) +; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]]) +; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5]], align 8 +; OPT-NEXT: ret void store ptr %input, ptr %addr, align 8 ret void } define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, ptr %result) { ; PTX-LABEL: grid_const_inlineasm_escape( -; PTX-NOT .local -; PTX: add.{{.*}} [[RD2:%.*]], [[RD1:%.*]], 4; -; PTX: cvta.param.u64 [[RD4:%.*]], [[RD2]] -; PTX: cvta.param.u64 [[RD3:%.*]], [[RD1]] -; PTX: add.s64 [[RD5:%.*]], [[RD3]], [[RD4]]; -; +; PTX: { +; PTX-NEXT: .reg .b64 %rd<8>; +; PTX-EMPTY: +; PTX-NEXT: // %bb.0: +; PTX-NEXT: mov.b64 %rd4, grid_const_inlineasm_escape_param_0; +; PTX-NEXT: ld.param.u64 %rd5, [grid_const_inlineasm_escape_param_1]; +; PTX-NEXT: cvta.to.global.u64 %rd6, %rd5; +; PTX-NEXT: mov.u64 %rd7, %rd4; +; PTX-NEXT: cvta.param.u64 %rd2, %rd7; +; PTX-NEXT: add.s64 %rd3, %rd2, 4; +; PTX-NEXT: // begin inline asm +; PTX-NEXT: add.s64 %rd1, %rd2, %rd3; +; PTX-NEXT: // end inline asm +; PTX-NEXT: st.global.u64 [%rd6], %rd1; +; PTX-NEXT: ret; +; PTX-NOT .local ; OPT-LABEL: define void @grid_const_inlineasm_escape( -; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) { -; OPT-NOT: alloca [[STRUCT_S]] -; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) -; OPT: [[TMPPTR13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT_PARAM]], i32 0, i32 0 -; OPT: [[TMPPTR22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT_PARAM]], i32 0, i32 1 -; OPT: [[TMPPTR22_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[TMPPTR22]]) -; OPT: [[TMPPTR13_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[TMPPTR13]]) -; OPT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2 -; +; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[RESULT4:%.*]] = addrspacecast ptr [[RESULT]] to ptr addrspace(1) +; OPT-NEXT: [[RESULT5:%.*]] = addrspacecast ptr addrspace(1) [[RESULT4]] to ptr +; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) +; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]]) +; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0 +; OPT-NEXT: [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1 +; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2 +; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5]], align 8 +; OPT-NEXT: ret void %tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0 %tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1 %1 = call i64 asm "add.s64 $0, $1, $2;", "=l,l,l"(ptr %tmpptr1, ptr %tmpptr2) #1 @@ -135,24 +219,42 @@ define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, pt define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) { ; PTX-LABEL: grid_const_partial_escape( -; PTX-NOT: .local -; PTX: ld.param.{{.*}} [[R1:%.*]], [grid_const_partial_escape_param_0]; -; PTX: add.{{.*}} -; PTX: cvta.param.u64 [[RD3:%.*]], {{%.*}} -; PTX: st.param.{{.*}} [param0+0], [[RD3]] -; PTX: call -; +; PTX: { +; PTX-NEXT: .reg .b32 %r<5>; +; PTX-NEXT: .reg .b64 %rd<6>; +; PTX-EMPTY: +; PTX-NEXT: // %bb.0: +; PTX-NEXT: mov.b64 %rd1, grid_const_partial_escape_param_0; +; PTX-NEXT: ld.param.u64 %rd2, [grid_const_partial_escape_param_1]; +; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2; +; PTX-NEXT: mov.u64 %rd4, %rd1; +; PTX-NEXT: cvta.param.u64 %rd5, %rd4; +; PTX-NEXT: ld.u32 %r1, [%rd5]; +; PTX-NEXT: add.s32 %r2, %r1, %r1; +; PTX-NEXT: st.global.u32 [%rd3], %r2; +; PTX-NEXT: { // callseq 2, 0 +; PTX-NEXT: .param .b64 param0; +; PTX-NEXT: st.param.b64 [param0+0], %rd5; +; PTX-NEXT: .param .b32 retval0; +; PTX-NEXT: call.uni (retval0), +; PTX-NEXT: escape, +; PTX-NEXT: ( +; PTX-NEXT: param0 +; PTX-NEXT: ); +; PTX-NEXT: ld.param.b32 %r3, [retval0+0]; +; PTX-NEXT: } // callseq 2 +; PTX-NEXT: ret; ; OPT-LABEL: define void @grid_const_partial_escape( -; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]], ptr {{%.*}}) { -; OPT-NOT: alloca -; OPT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) -; OPT: [[VAL:%.*]] = load i32, ptr addrspace(101) [[INPUT1]], align 4 -; OPT: [[TWICE:%.*]] = add i32 [[VAL]], [[VAL]] -; OPT: store i32 [[TWICE]] -; OPT: [[INPUT1_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1]]) -; OPT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]]) -; OPT: ret void -; +; OPT-SAME: ptr byval(i32) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1) +; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr +; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) +; OPT-NEXT: [[INPUT1_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1]]) +; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[INPUT1_GEN]], align 4 +; OPT-NEXT: [[TWICE:%.*]] = add i32 [[VAL1]], [[VAL1]] +; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5]], align 4 +; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]]) +; OPT-NEXT: ret void %val = load i32, ptr %input %twice = add i32 %val, %val store i32 %twice, ptr %output @@ -163,27 +265,46 @@ define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) { define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %output) { ; PTX-LABEL: grid_const_partial_escapemem( ; PTX: { -; PTX: ld.param.{{.*}} [[R1:%.*]], [grid_const_partial_escapemem_param_0]; -; PTX: ld.param.{{.*}} [[R2:%.*]], [grid_const_partial_escapemem_param_0+4]; -; PTX: cvta.param.{{.*}} [[RD5:%.*]], {{%.*}}; -; PTX: st.global.{{.*}} [{{.*}}], [[RD5]]; -; PTX: add.s32 [[R3:%.*]], [[R1]], [[R2]] -; PTX: st.param.{{.*}} [param0+0], [[RD5]] -; PTX: escape +; PTX-NEXT: .reg .b32 %r<6>; +; PTX-NEXT: .reg .b64 %rd<6>; +; PTX-EMPTY: +; PTX-NEXT: // %bb.0: +; PTX-NEXT: mov.b64 %rd1, grid_const_partial_escapemem_param_0; +; PTX-NEXT: ld.param.u64 %rd2, [grid_const_partial_escapemem_param_1]; +; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2; +; PTX-NEXT: mov.u64 %rd4, %rd1; +; PTX-NEXT: cvta.param.u64 %rd5, %rd4; +; PTX-NEXT: ld.u32 %r1, [%rd5]; +; PTX-NEXT: ld.u32 %r2, [%rd5+4]; +; PTX-NEXT: st.global.u64 [%rd3], %rd5; +; PTX-NEXT: add.s32 %r3, %r1, %r2; +; PTX-NEXT: { // callseq 3, 0 +; PTX-NEXT: .param .b64 param0; +; PTX-NEXT: st.param.b64 [param0+0], %rd5; +; PTX-NEXT: .param .b32 retval0; +; PTX-NEXT: call.uni (retval0), +; PTX-NEXT: escape, +; PTX-NEXT: ( +; PTX-NEXT: param0 +; PTX-NEXT: ); +; PTX-NEXT: ld.param.b32 %r4, [retval0+0]; +; PTX-NEXT: } // callseq 3 +; PTX-NEXT: st.param.b32 [func_retval0+0], %r3; +; PTX-NEXT: ret; ; OPT-LABEL: define i32 @grid_const_partial_escapemem( -; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr {{%.*}}) { -; OPT-NOT: alloca -; OPT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) -; OPT: [[PTR13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT2]], i32 0, i32 0 -; OPT: [[VAL1:%.*]] = load i32, ptr addrspace(101) [[PTR13]], align 4 -; OPT: [[PTR22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT2]], i32 0, i32 1 -; OPT: [[VAL2:%.*]] = load i32, ptr addrspace(101) [[PTR22]], align 4 -; OPT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]]) -; OPT: store ptr [[INPUT1]] -; OPT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]] -; OPT: [[PTR1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[PTR13]]) -; OPT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]]) -; +; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1) +; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr +; OPT-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) +; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]]) +; OPT-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0 +; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[PTR1]], align 4 +; OPT-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1 +; OPT-NEXT: [[VAL2:%.*]] = load i32, ptr [[PTR2]], align 4 +; OPT-NEXT: store ptr [[INPUT1]], ptr [[OUTPUT5]], align 8 +; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]] +; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]]) +; OPT-NEXT: ret i32 [[ADD]] %ptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0 %val1 = load i32, ptr %ptr1 %ptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1 @@ -194,29 +315,48 @@ define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %outpu ret i32 %add } -define void @grid_const_phi_escape(ptr byval(%struct.s) align 4 %input1, ptr %inout) { -; PTX-LABEL: grid_const_phi_escape( -; PTX: cvta.param.{{.*}} [[RD1:%.*]], {{.*}} -; PTX: @[[P1:%.*]] bra $L__BB[[TARGET_LABEL:[_0-9]+]]; -; PTX: $L__BB[[TARGET_LABEL]]: -; PTX: ld.{{.*}} [[R1:%.*]], [[[RD1]]]; -; -; OPT-LABEL: define void @grid_const_phi_escape( -; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr {{%.*}}) { -; OPT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) -; OPT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]]) -; OPT: br i1 {{.*}}, label %[[FIRST:.*]], label %[[SECOND:.*]] +define void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr %inout) { +; PTX-LABEL: grid_const_phi( +; PTX: { +; PTX-NEXT: .reg .pred %p<2>; +; PTX-NEXT: .reg .b32 %r<3>; +; PTX-NEXT: .reg .b64 %rd<9>; +; PTX-EMPTY: +; PTX-NEXT: // %bb.0: +; PTX-NEXT: mov.b64 %rd5, grid_const_phi_param_0; +; PTX-NEXT: ld.param.u64 %rd6, [grid_const_phi_param_1]; +; PTX-NEXT: cvta.to.global.u64 %rd1, %rd6; +; PTX-NEXT: mov.u64 %rd7, %rd5; +; PTX-NEXT: cvta.param.u64 %rd8, %rd7; +; PTX-NEXT: ld.global.u32 %r1, [%rd1]; +; PTX-NEXT: setp.lt.s32 %p1, %r1, 0; +; PTX-NEXT: @%p1 bra $L__BB8_2; +; PTX-NEXT: // %bb.1: // %second +; PTX-NEXT: add.s64 %rd8, %rd8, 4; +; PTX-NEXT: $L__BB8_2: // %merge +; PTX-NEXT: ld.u32 %r2, [%rd8]; +; PTX-NEXT: st.global.u32 [%rd1], %r2; +; PTX-NEXT: ret; +; OPT-LABEL: define void @grid_const_phi( +; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1) +; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr +; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) +; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]]) +; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4 +; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0 +; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]] ; OPT: [[FIRST]]: -; OPT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1_PARAM_GEN]], i32 0, i32 0 -; OPT: br label %[[MERGE:.*]] +; OPT-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1_PARAM_GEN]], i32 0, i32 0 +; OPT-NEXT: br label %[[MERGE:.*]] ; OPT: [[SECOND]]: -; OPT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1_PARAM_GEN]], i32 0, i32 1 -; OPT: br label %[[MERGE]] +; OPT-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1_PARAM_GEN]], i32 0, i32 1 +; OPT-NEXT: br label %[[MERGE]] ; OPT: [[MERGE]]: -; OPT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ] -; OPT-NOT: load i32, ptr addrspace(101) -; OPT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 -; +; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ] +; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 +; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4 +; OPT-NEXT: ret void %val = load i32, ptr %inout %less = icmp slt i32 %val, 0 @@ -235,32 +375,53 @@ merge: } ; NOTE: %input2 is *not* grid_constant -define void @grid_const_phi_escape2(ptr byval(%struct.s) align 4 %input1, ptr byval(%struct.s) %input2, ptr %inout) { -; PTX-LABEL: grid_const_phi_escape2( -; PTX: ld.param.{{.*}} [[R1:%.*]], [grid_const_phi_escape2_param_1+4]; -; PTX: @[[P1:%.*]] bra $L__BB[[LABEL:[_0-9]+]]; -; PTX: cvta.param.u64 [[RD1:%.*]], [[RD2:%.*]]; -; PTX: ld.u32 [[R1]], [[[RD1]]]; -; PTX: $L__BB[[LABEL]]: -; PTX: st.global.u32 [[[RD3:%.*]]], [[R1]] -; OPT-LABEL: define void @grid_const_phi_escape2( -; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr {{%.*}}) { -; OPT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8 -; OPT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101) -; OPT: [[INPUT26:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT25]], align 8 -; OPT: store [[STRUCT_S]] [[INPUT26]], ptr [[INPUT24]], align 4 -; OPT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) -; OPT: [[INPUT11:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT12]]) -; OPT: br i1 [[LESS:%.*]], label %[[FIRST:.*]], label %[[SECOND:.*]] +define void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ptr byval(%struct.s) %input2, ptr %inout) { +; PTX-LABEL: grid_const_phi_ngc( +; PTX: { +; PTX-NEXT: .reg .pred %p<2>; +; PTX-NEXT: .reg .b32 %r<3>; +; PTX-NEXT: .reg .b64 %rd<12>; +; PTX-EMPTY: +; PTX-NEXT: // %bb.0: +; PTX-NEXT: mov.b64 %rd6, grid_const_phi_ngc_param_0; +; PTX-NEXT: ld.param.u64 %rd7, [grid_const_phi_ngc_param_2]; +; PTX-NEXT: cvta.to.global.u64 %rd1, %rd7; +; PTX-NEXT: mov.u64 %rd10, %rd6; +; PTX-NEXT: cvta.param.u64 %rd11, %rd10; +; PTX-NEXT: ld.global.u32 %r1, [%rd1]; +; PTX-NEXT: setp.lt.s32 %p1, %r1, 0; +; PTX-NEXT: @%p1 bra $L__BB9_2; +; PTX-NEXT: // %bb.1: // %second +; PTX-NEXT: mov.b64 %rd8, grid_const_phi_ngc_param_1; +; PTX-NEXT: mov.u64 %rd9, %rd8; +; PTX-NEXT: cvta.param.u64 %rd2, %rd9; +; PTX-NEXT: add.s64 %rd11, %rd2, 4; +; PTX-NEXT: $L__BB9_2: // %merge +; PTX-NEXT: ld.u32 %r2, [%rd11]; +; PTX-NEXT: st.global.u32 [%rd1], %r2; +; PTX-NEXT: ret; +; OPT-LABEL: define void @grid_const_phi_ngc( +; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1) +; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr +; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101) +; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]]) +; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) +; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]]) +; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4 +; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0 +; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]] ; OPT: [[FIRST]]: -; OPT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0 -; OPT: br label %[[MERGE:.*]] +; OPT-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1_PARAM_GEN]], i32 0, i32 0 +; OPT-NEXT: br label %[[MERGE:.*]] ; OPT: [[SECOND]]: -; OPT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT24]], i32 0, i32 1 -; OPT: br label %[[MERGE]] +; OPT-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT2_PARAM_GEN]], i32 0, i32 1 +; OPT-NEXT: br label %[[MERGE]] ; OPT: [[MERGE]]: -; OPT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ] -; +; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ] +; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 +; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4 +; OPT-NEXT: ret void %val = load i32, ptr %inout %less = icmp slt i32 %val, 0 br i1 %less, label %first, label %second @@ -278,22 +439,42 @@ merge: } ; NOTE: %input2 is *not* grid_constant -define void @grid_const_select_escape(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %inout) { -; PTX-LABEL: grid_const_select_escape( -; PTX: cvta.param.{{.*}} [[RD2:%.*]], [[RD1:%.*]] -; PTX: setp.lt.{{.*}} [[P1:%.*]], {{%.*}}, 0 -; PTX: add.{{.*}} [[RD3:%.*]], %SP, 0; -; PTX: selp.{{.*}} [[RD4:%.*]], [[RD2]], [[RD3]], [[P1]]; -; PTX: ld.u32 {{%.*}}, [[[RD4]]]; -; OPT-LABEL: define void @grid_const_select_escape( -; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) { -; OPT: [[INPUT24:%.*]] = alloca i32, align 4 -; OPT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) -; OPT: [[INPUT11:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT12]]) -; OPT: load i32, ptr [[INOUT]] -; OPT: [[PTRNEW:%.*]] = select i1 [[LESS:%.*]], ptr [[INPUT11]], ptr [[INPUT24]] -; OPT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 -; +define void @grid_const_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %inout) { +; PTX-LABEL: grid_const_select( +; PTX: { +; PTX-NEXT: .reg .pred %p<2>; +; PTX-NEXT: .reg .b32 %r<3>; +; PTX-NEXT: .reg .b64 %rd<10>; +; PTX-EMPTY: +; PTX-NEXT: // %bb.0: +; PTX-NEXT: mov.b64 %rd1, grid_const_select_param_0; +; PTX-NEXT: ld.param.u64 %rd2, [grid_const_select_param_2]; +; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2; +; PTX-NEXT: mov.b64 %rd4, grid_const_select_param_1; +; PTX-NEXT: mov.u64 %rd5, %rd4; +; PTX-NEXT: cvta.param.u64 %rd6, %rd5; +; PTX-NEXT: mov.u64 %rd7, %rd1; +; PTX-NEXT: cvta.param.u64 %rd8, %rd7; +; PTX-NEXT: ld.global.u32 %r1, [%rd3]; +; PTX-NEXT: setp.lt.s32 %p1, %r1, 0; +; PTX-NEXT: selp.b64 %rd9, %rd8, %rd6, %p1; +; PTX-NEXT: ld.u32 %r2, [%rd9]; +; PTX-NEXT: st.global.u32 [%rd3], %r2; +; PTX-NEXT: ret; +; OPT-LABEL: define void @grid_const_select( +; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1) +; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr +; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101) +; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]]) +; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) +; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]]) +; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4 +; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0 +; OPT-NEXT: [[PTRNEW:%.*]] = select i1 [[LESS]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]] +; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 +; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4 +; OPT-NEXT: ret void %val = load i32, ptr %inout %less = icmp slt i32 %val, 0 %ptrnew = select i1 %less, ptr %input1, ptr %input2 @@ -304,16 +485,27 @@ define void @grid_const_select_escape(ptr byval(i32) align 4 %input1, ptr byval( define i32 @grid_const_ptrtoint(ptr byval(i32) %input) { ; PTX-LABEL: grid_const_ptrtoint( -; PTX-NOT: .local -; PTX: ld.param.{{.*}} {{%.*}}, [grid_const_ptrtoint_param_0]; -; PTX: cvta.param.u64 [[RD1:%.*]], {{%.*}} -; PTX: cvt.u32.u64 {{%.*}}, [[RD1]] +; PTX: { +; PTX-NEXT: .reg .b32 %r<4>; +; PTX-NEXT: .reg .b64 %rd<4>; +; PTX-EMPTY: +; PTX-NEXT: // %bb.0: +; PTX-NEXT: mov.b64 %rd1, grid_const_ptrtoint_param_0; +; PTX-NEXT: mov.u64 %rd2, %rd1; +; PTX-NEXT: ld.param.u32 %r1, [grid_const_ptrtoint_param_0]; +; PTX-NEXT: cvta.param.u64 %rd3, %rd2; +; PTX-NEXT: cvt.u32.u64 %r2, %rd3; +; PTX-NEXT: add.s32 %r3, %r1, %r2; +; PTX-NEXT: st.param.b32 [func_retval0+0], %r3; +; PTX-NEXT: ret; ; OPT-LABEL: define i32 @grid_const_ptrtoint( -; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) { -; OPT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) -; OPT: [[VAL:%.*]] = load i32, ptr addrspace(101) [[INPUT2]] -; OPT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]]) -; OPT: [[PTRVAL:%.*]] = ptrtoint ptr [[INPUT1]] to i32 +; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) +; OPT-NEXT: [[INPUT3:%.*]] = load i32, ptr addrspace(101) [[INPUT2]], align 4 +; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]]) +; OPT-NEXT: [[PTRVAL:%.*]] = ptrtoint ptr [[INPUT1]] to i32 +; OPT-NEXT: [[KEEPALIVE:%.*]] = add i32 [[INPUT3]], [[PTRVAL]] +; OPT-NEXT: ret i32 [[KEEPALIVE]] %val = load i32, ptr %input %ptrval = ptrtoint ptr %input to i32 %keepalive = add i32 %val, %ptrval @@ -352,13 +544,13 @@ declare dso_local ptr @escape3(ptr, ptr, ptr) local_unnamed_addr !14 = !{ptr @grid_const_partial_escapemem, !"kernel", i32 1, !"grid_constant", !15} !15 = !{i32 1} -!16 = !{ptr @grid_const_phi_escape, !"kernel", i32 1, !"grid_constant", !17} +!16 = !{ptr @grid_const_phi, !"kernel", i32 1, !"grid_constant", !17} !17 = !{i32 1} -!18 = !{ptr @grid_const_phi_escape2, !"kernel", i32 1, !"grid_constant", !19} +!18 = !{ptr @grid_const_phi_ngc, !"kernel", i32 1, !"grid_constant", !19} !19 = !{i32 1} -!20 = !{ptr @grid_const_select_escape, !"kernel", i32 1, !"grid_constant", !21} +!20 = !{ptr @grid_const_select, !"kernel", i32 1, !"grid_constant", !21} !21 = !{i32 1} !22 = !{ptr @grid_const_ptrtoint, !"kernel", i32 1, !"grid_constant", !23} diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll index f041f202777f61..a414a6c41cd5b2 100644 --- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll @@ -1,166 +1,469 @@ -; RUN: llc < %s -mtriple nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK32 -; RUN: llc < %s -mtriple nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK64 -; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} -; RUN: %if ptxas %{ llc < %s -mtriple nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} - -%struct.ham = type { [4 x i32] } - -; // Verify that load with static offset into parameter is done directly. -; CHECK-LABEL: .visible .entry static_offset -; CHECK-NOT: .local -; CHECK64: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0] -; CHECK64: mov.b64 %[[param_addr:rd[0-9]+]], {{.*}}_param_1 -; CHECK64: mov.u64 %[[param_addr1:rd[0-9]+]], %[[param_addr]] -; CHECK64: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]] -; -; CHECK32: ld.param.u32 [[result_addr:%r[0-9]+]], [{{.*}}_param_0] -; CHECK32: mov.b32 %[[param_addr:r[0-9]+]], {{.*}}_param_1 -; CHECK32: mov.u32 %[[param_addr1:r[0-9]+]], %[[param_addr]] -; CHECK32: cvta.to.global.u32 [[result_addr_g:%r[0-9]+]], [[result_addr]] -; -; CHECK: ld.param.u32 [[value:%r[0-9]+]], [%[[param_addr1]]+12]; -; CHECK: st.global.u32 [[[result_addr_g]]], [[value]]; -; Function Attrs: nofree norecurse nounwind willreturn mustprogress -define dso_local void @static_offset(ptr nocapture %arg, ptr nocapture readonly byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #0 { -bb: - %tmp = icmp eq i32 %arg2, 3 - br i1 %tmp, label %bb3, label %bb6 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 5 +; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_60 -mattr=ptx77 -nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_60 +; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_70 +source_filename = "" +target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +%struct.S = type { i32, i32 } + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +declare dso_local void @_Z6escapePv(ptr noundef) local_unnamed_addr #0 + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +declare dso_local void @_Z6escapei(i32 noundef) local_unnamed_addr #0 -bb3: ; preds = %bb - %tmp4 = getelementptr inbounds %struct.ham, ptr %arg1, i64 0, i32 0, i64 3 - %tmp5 = load i32, ptr %tmp4, align 4 - store i32 %tmp5, ptr %arg, align 4 - br label %bb6 +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1 -bb6: ; preds = %bb3, %bb +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) #1 + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2 + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +define dso_local void @read_only(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; COMMON-LABEL: define dso_local void @read_only( +; COMMON-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) +; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr +; COMMON-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[S3]], align 4 +; COMMON-NEXT: store i32 [[I]], ptr [[OUT2]], align 4 +; COMMON-NEXT: ret void +; +entry: + %i = load i32, ptr %s, align 4 + store i32 %i, ptr %out, align 4 ret void } -; // Verify that load with dynamic offset into parameter is also done directly. -; CHECK-LABEL: .visible .entry dynamic_offset -; CHECK-NOT: .local -; CHECK64: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0] -; CHECK64: mov.b64 %[[param_addr:rd[0-9]+]], {{.*}}_param_1 -; CHECK64: mov.u64 %[[param_addr1:rd[0-9]+]], %[[param_addr]] -; CHECK64: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]] -; CHECK64: add.s64 %[[param_w_offset:rd[0-9]+]], %[[param_addr1]], +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +define dso_local void @read_only_gep(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; COMMON-LABEL: define dso_local void @read_only_gep( +; COMMON-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) +; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr +; COMMON-NEXT: [[B4:%.*]] = getelementptr inbounds i8, ptr addrspace(101) [[S3]], i64 4 +; COMMON-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[B4]], align 4 +; COMMON-NEXT: store i32 [[I]], ptr [[OUT2]], align 4 +; COMMON-NEXT: ret void ; -; CHECK32: ld.param.u32 [[result_addr:%r[0-9]+]], [{{.*}}_param_0] -; CHECK32: mov.b32 %[[param_addr:r[0-9]+]], {{.*}}_param_1 -; CHECK32: mov.u32 %[[param_addr1:r[0-9]+]], %[[param_addr]] -; CHECK32: cvta.to.global.u32 [[result_addr_g:%r[0-9]+]], [[result_addr]] -; CHECK32: add.s32 %[[param_w_offset:r[0-9]+]], %[[param_addr1]], +entry: + %b = getelementptr inbounds nuw i8, ptr %s, i64 4 + %i = load i32, ptr %b, align 4 + store i32 %i, ptr %out, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +define dso_local void @read_only_gep_asc(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; COMMON-LABEL: define dso_local void @read_only_gep_asc( +; COMMON-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) +; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr +; COMMON-NEXT: [[B4:%.*]] = getelementptr inbounds i8, ptr addrspace(101) [[S3]], i64 4 +; COMMON-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[B4]], align 4 +; COMMON-NEXT: store i32 [[I]], ptr [[OUT2]], align 4 +; COMMON-NEXT: ret void ; -; CHECK: ld.param.u32 [[value:%r[0-9]+]], [%[[param_w_offset]]]; -; CHECK: st.global.u32 [[[result_addr_g]]], [[value]]; +entry: + %b = getelementptr inbounds nuw i8, ptr %s, i64 4 + %asc = addrspacecast ptr %b to ptr addrspace(101) + %i = load i32, ptr addrspace(101) %asc, align 4 + store i32 %i, ptr %out, align 4 + ret void +} -; Function Attrs: nofree norecurse nounwind willreturn mustprogress -define dso_local void @dynamic_offset(ptr nocapture %arg, ptr nocapture readonly byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #0 { -bb: - %tmp = sext i32 %arg2 to i64 - %tmp3 = getelementptr inbounds %struct.ham, ptr %arg1, i64 0, i32 0, i64 %tmp - %tmp4 = load i32, ptr %tmp3, align 4 - store i32 %tmp4, ptr %arg, align 4 +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +define dso_local void @read_only_gep_asc0(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; COMMON-LABEL: define dso_local void @read_only_gep_asc0( +; COMMON-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 +; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) +; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4 +; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4 +; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr +; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4 +; COMMON-NEXT: [[ASC:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101) +; COMMON-NEXT: [[ASC0:%.*]] = addrspacecast ptr addrspace(101) [[ASC]] to ptr +; COMMON-NEXT: [[I:%.*]] = load i32, ptr [[ASC0]], align 4 +; COMMON-NEXT: store i32 [[I]], ptr [[OUT2]], align 4 +; COMMON-NEXT: ret void +; +entry: + %b = getelementptr inbounds nuw i8, ptr %s, i64 4 + %asc = addrspacecast ptr %b to ptr addrspace(101) + %asc0 = addrspacecast ptr addrspace(101) %asc to ptr + %i = load i32, ptr %asc0, align 4 + store i32 %i, ptr %out, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +define dso_local void @escape_ptr(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; COMMON-LABEL: define dso_local void @escape_ptr( +; COMMON-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 +; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) +; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4 +; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4 +; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr +; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S3]]) +; COMMON-NEXT: ret void +; +entry: + call void @_Z6escapePv(ptr noundef nonnull %s) #0 ret void } -; Same as above, but with a bitcast present in the chain -; CHECK-LABEL:.visible .entry gep_bitcast -; CHECK-NOT: .local -; CHECK64-DAG: ld.param.u64 [[out:%rd[0-9]+]], [gep_bitcast_param_0] -; CHECK64-DAG: mov.b64 {{%rd[0-9]+}}, gep_bitcast_param_1 +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +define dso_local void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; COMMON-LABEL: define dso_local void @escape_ptr_gep( +; COMMON-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 +; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) +; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4 +; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4 +; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr +; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4 +; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[B]]) +; COMMON-NEXT: ret void ; -; CHECK32-DAG: ld.param.u32 [[out:%r[0-9]+]], [gep_bitcast_param_0] -; CHECK32-DAG: mov.b32 {{%r[0-9]+}}, gep_bitcast_param_1 +entry: + %b = getelementptr inbounds nuw i8, ptr %s, i64 4 + call void @_Z6escapePv(ptr noundef nonnull %b) #0 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +define dso_local void @escape_ptr_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; COMMON-LABEL: define dso_local void @escape_ptr_store( +; COMMON-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 +; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) +; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4 +; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4 +; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr +; COMMON-NEXT: store ptr [[S3]], ptr [[OUT2]], align 8 +; COMMON-NEXT: ret void ; -; CHECK-DAG: ld.param.u32 {{%r[0-9]+}}, [gep_bitcast_param_2] -; CHECK64: ld.param.u8 [[value:%rs[0-9]+]], [{{%rd[0-9]+}}] -; CHECK64: st.global.u8 [{{%rd[0-9]+}}], [[value]]; -; CHECK32: ld.param.u8 [[value:%rs[0-9]+]], [{{%r[0-9]+}}] -; CHECK32: st.global.u8 [{{%r[0-9]+}}], [[value]]; +entry: + store ptr %s, ptr %out, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +define dso_local void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; COMMON-LABEL: define dso_local void @escape_ptr_gep_store( +; COMMON-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 +; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) +; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4 +; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4 +; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr +; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4 +; COMMON-NEXT: store ptr [[B]], ptr [[OUT2]], align 8 +; COMMON-NEXT: ret void +; +entry: + %b = getelementptr inbounds nuw i8, ptr %s, i64 4 + store ptr %b, ptr %out, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +define dso_local void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; COMMON-LABEL: define dso_local void @escape_ptrtoint( +; COMMON-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 +; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) +; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4 +; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4 +; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr +; COMMON-NEXT: [[I:%.*]] = ptrtoint ptr [[S3]] to i64 +; COMMON-NEXT: store i64 [[I]], ptr [[OUT2]], align 8 +; COMMON-NEXT: ret void +; +entry: + %i = ptrtoint ptr %s to i64 + store i64 %i, ptr %out, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +define dso_local void @memcpy_from_param(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; COMMON-LABEL: define dso_local void @memcpy_from_param( +; COMMON-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) +; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr +; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT2]], ptr addrspace(101) [[S3]], i64 16, i1 true) +; COMMON-NEXT: ret void +; +entry: + tail call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %s, i64 16, i1 true) + ret void +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +define dso_local void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr nocapture noundef readnone byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; COMMON-LABEL: define dso_local void @memcpy_to_param( +; COMMON-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef readnone byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 +; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) +; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4 +; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4 +; COMMON-NEXT: [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1) +; COMMON-NEXT: [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr +; COMMON-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S3]], ptr [[IN2]], i64 16, i1 true) +; COMMON-NEXT: ret void +; +entry: + tail call void @llvm.memcpy.p0.p0.i64(ptr %s, ptr %in, i64 16, i1 true) + ret void +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +define dso_local void @copy_on_store(ptr nocapture noundef readonly %in, ptr nocapture noundef byval(%struct.S) align 4 %s, i1 noundef zeroext %b) local_unnamed_addr #0 { +; COMMON-LABEL: define dso_local void @copy_on_store( +; COMMON-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COMMON-NEXT: [[BB:.*:]] +; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 +; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) +; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4 +; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4 +; COMMON-NEXT: [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1) +; COMMON-NEXT: [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr +; COMMON-NEXT: [[I:%.*]] = load i32, ptr [[IN2]], align 4 +; COMMON-NEXT: store i32 [[I]], ptr [[S3]], align 4 +; COMMON-NEXT: ret void ; -; Function Attrs: nofree norecurse nounwind willreturn mustprogress -define dso_local void @gep_bitcast(ptr nocapture %out, ptr nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 { bb: - %n64 = sext i32 %n to i64 - %gep = getelementptr inbounds %struct.ham, ptr %in, i64 0, i32 0, i64 %n64 - %load = load i8, ptr %gep, align 4 - store i8 %load, ptr %out, align 4 + %i = load i32, ptr %in, align 4 + store i32 %i, ptr %s, align 4 ret void } -; Same as above, but with an ASC(101) present in the chain -; CHECK-LABEL:.visible .entry gep_bitcast_asc -; CHECK-NOT: .local -; CHECK64-DAG: ld.param.u64 [[out:%rd[0-9]+]], [gep_bitcast_asc_param_0] -; CHECK64-DAG: mov.b64 {{%rd[0-9]+}}, gep_bitcast_asc_param_1 +define void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %out, i1 %cond) { +; SM_60-LABEL: define void @test_select( +; SM_60-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { +; SM_60-NEXT: [[BB:.*:]] +; SM_60-NEXT: [[OUT7:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; SM_60-NEXT: [[OUT8:%.*]] = addrspacecast ptr addrspace(1) [[OUT7]] to ptr +; SM_60-NEXT: [[INPUT24:%.*]] = alloca i32, align 4 +; SM_60-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101) +; SM_60-NEXT: [[INPUT26:%.*]] = load i32, ptr addrspace(101) [[INPUT25]], align 4 +; SM_60-NEXT: store i32 [[INPUT26]], ptr [[INPUT24]], align 4 +; SM_60-NEXT: [[INPUT11:%.*]] = alloca i32, align 4 +; SM_60-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) +; SM_60-NEXT: [[INPUT13:%.*]] = load i32, ptr addrspace(101) [[INPUT12]], align 4 +; SM_60-NEXT: store i32 [[INPUT13]], ptr [[INPUT11]], align 4 +; SM_60-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]] +; SM_60-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 +; SM_60-NEXT: store i32 [[VALLOADED]], ptr [[OUT8]], align 4 +; SM_60-NEXT: ret void ; -; CHECK32-DAG: ld.param.u32 [[out:%r[0-9]+]], [gep_bitcast_asc_param_0] -; CHECK32-DAG: mov.b32 {{%r[0-9]+}}, gep_bitcast_asc_param_1 +; SM_70-LABEL: define void @test_select( +; SM_70-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { +; SM_70-NEXT: [[BB:.*:]] +; SM_70-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; SM_70-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr +; SM_70-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101) +; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]]) +; SM_70-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) +; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]]) +; SM_70-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]] +; SM_70-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 +; SM_70-NEXT: store i32 [[VALLOADED]], ptr [[OUT2]], align 4 +; SM_70-NEXT: ret void ; -; CHECK-DAG: ld.param.u32 {{%r[0-9]+}}, [gep_bitcast_asc_param_2] -; CHECK64: ld.param.u8 [[value:%rs[0-9]+]], [{{%rd[0-9]+}}] -; CHECK64: st.global.u8 [{{%rd[0-9]+}}], [[value]]; -; CHECK32: ld.param.u8 [[value:%rs[0-9]+]], [{{%r[0-9]+}}] -; CHECK32: st.global.u8 [{{%r[0-9]+}}], [[value]]; +bb: + %ptrnew = select i1 %cond, ptr %input1, ptr %input2 + %valloaded = load i32, ptr %ptrnew, align 4 + store i32 %valloaded, ptr %out, align 4 + ret void +} + +define void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %out, i1 %cond) { +; COMMON-LABEL: define void @test_select_write( +; COMMON-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { +; COMMON-NEXT: [[BB:.*:]] +; COMMON-NEXT: [[OUT7:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) +; COMMON-NEXT: [[OUT8:%.*]] = addrspacecast ptr addrspace(1) [[OUT7]] to ptr +; COMMON-NEXT: [[INPUT24:%.*]] = alloca i32, align 4 +; COMMON-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101) +; COMMON-NEXT: [[INPUT26:%.*]] = load i32, ptr addrspace(101) [[INPUT25]], align 4 +; COMMON-NEXT: store i32 [[INPUT26]], ptr [[INPUT24]], align 4 +; COMMON-NEXT: [[INPUT11:%.*]] = alloca i32, align 4 +; COMMON-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) +; COMMON-NEXT: [[INPUT13:%.*]] = load i32, ptr addrspace(101) [[INPUT12]], align 4 +; COMMON-NEXT: store i32 [[INPUT13]], ptr [[INPUT11]], align 4 +; COMMON-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]] +; COMMON-NEXT: store i32 1, ptr [[PTRNEW]], align 4 +; COMMON-NEXT: ret void ; -; Function Attrs: nofree norecurse nounwind willreturn mustprogress -define dso_local void @gep_bitcast_asc(ptr nocapture %out, ptr nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 { bb: - %n64 = sext i32 %n to i64 - %gep = getelementptr inbounds %struct.ham, ptr %in, i64 0, i32 0, i64 %n64 - %asc = addrspacecast ptr %gep to ptr addrspace(101) - %load = load i8, ptr addrspace(101) %asc, align 4 - store i8 %load, ptr %out, align 4 - ret void -} - - -; Verify that if the pointer escapes, then we do fall back onto using a temp copy. -; CHECK-LABEL: .visible .entry pointer_escapes -; CHECK: .local .align 4 .b8 __local_depot{{.*}} -; CHECK64: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0] -; CHECK64: add.u64 %[[copy_addr:rd[0-9]+]], %SPL, 0; -; CHECK32: ld.param.u32 [[result_addr:%r[0-9]+]], [{{.*}}_param_0] -; CHECK32: add.u32 %[[copy_addr:r[0-9]+]], %SPL, 0; -; CHECK-DAG: ld.param.u32 %{{.*}}, [pointer_escapes_param_1+12]; -; CHECK-DAG: ld.param.u32 %{{.*}}, [pointer_escapes_param_1+8]; -; CHECK-DAG: ld.param.u32 %{{.*}}, [pointer_escapes_param_1+4]; -; CHECK-DAG: ld.param.u32 %{{.*}}, [pointer_escapes_param_1]; -; CHECK-DAG: st.local.u32 [%[[copy_addr]]+12], -; CHECK-DAG: st.local.u32 [%[[copy_addr]]+8], -; CHECK-DAG: st.local.u32 [%[[copy_addr]]+4], -; CHECK-DAG: st.local.u32 [%[[copy_addr]]], -; CHECK64: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]] -; CHECK64: add.s64 %[[copy_w_offset:rd[0-9]+]], %[[copy_addr]], -; CHECK32: cvta.to.global.u32 [[result_addr_g:%r[0-9]+]], [[result_addr]] -; CHECK32: add.s32 %[[copy_w_offset:r[0-9]+]], %[[copy_addr]], -; CHECK: ld.local.u32 [[value:%r[0-9]+]], [%[[copy_w_offset]]]; -; CHECK: st.global.u32 [[[result_addr_g]]], [[value]]; - -; Function Attrs: convergent norecurse nounwind mustprogress -define dso_local void @pointer_escapes(ptr nocapture %arg, ptr byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #1 { + %ptrnew = select i1 %cond, ptr %input1, ptr %input2 + store i32 1, ptr %ptrnew, align 4 + ret void +} + +define void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) %input2, ptr %inout, i1 %cond) { +; SM_60-LABEL: define void @test_phi( +; SM_60-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { +; SM_60-NEXT: [[BB:.*:]] +; SM_60-NEXT: [[INOUT7:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1) +; SM_60-NEXT: [[INOUT8:%.*]] = addrspacecast ptr addrspace(1) [[INOUT7]] to ptr +; SM_60-NEXT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8 +; SM_60-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101) +; SM_60-NEXT: [[INPUT26:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT25]], align 8 +; SM_60-NEXT: store [[STRUCT_S]] [[INPUT26]], ptr [[INPUT24]], align 4 +; SM_60-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4 +; SM_60-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) +; SM_60-NEXT: [[INPUT13:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT12]], align 4 +; SM_60-NEXT: store [[STRUCT_S]] [[INPUT13]], ptr [[INPUT11]], align 4 +; SM_60-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]] +; SM_60: [[FIRST]]: +; SM_60-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0 +; SM_60-NEXT: br label %[[MERGE:.*]] +; SM_60: [[SECOND]]: +; SM_60-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT24]], i32 0, i32 1 +; SM_60-NEXT: br label %[[MERGE]] +; SM_60: [[MERGE]]: +; SM_60-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ] +; SM_60-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 +; SM_60-NEXT: store i32 [[VALLOADED]], ptr [[INOUT8]], align 4 +; SM_60-NEXT: ret void +; +; SM_70-LABEL: define void @test_phi( +; SM_70-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { +; SM_70-NEXT: [[BB:.*:]] +; SM_70-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1) +; SM_70-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr +; SM_70-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101) +; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]]) +; SM_70-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) +; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]]) +; SM_70-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]] +; SM_70: [[FIRST]]: +; SM_70-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1_PARAM_GEN]], i32 0, i32 0 +; SM_70-NEXT: br label %[[MERGE:.*]] +; SM_70: [[SECOND]]: +; SM_70-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT2_PARAM_GEN]], i32 0, i32 1 +; SM_70-NEXT: br label %[[MERGE]] +; SM_70: [[MERGE]]: +; SM_70-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ] +; SM_70-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 +; SM_70-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4 +; SM_70-NEXT: ret void +; bb: - %tmp = sext i32 %arg2 to i64 - %tmp3 = getelementptr inbounds %struct.ham, ptr %arg1, i64 0, i32 0, i64 %tmp - %tmp4 = load i32, ptr %tmp3, align 4 - store i32 %tmp4, ptr %arg, align 4 - %tmp5 = call ptr @escape(ptr nonnull %tmp3) #3 + br i1 %cond, label %first, label %second + +first: ; preds = %bb + %ptr1 = getelementptr inbounds %struct.S, ptr %input1, i32 0, i32 0 + br label %merge + +second: ; preds = %bb + %ptr2 = getelementptr inbounds %struct.S, ptr %input2, i32 0, i32 1 + br label %merge + +merge: ; preds = %second, %first + %ptrnew = phi ptr [ %ptr1, %first ], [ %ptr2, %second ] + %valloaded = load i32, ptr %ptrnew, align 4 + store i32 %valloaded, ptr %inout, align 4 ret void } -; Function Attrs: convergent nounwind -declare dso_local ptr @escape(ptr) local_unnamed_addr +define void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) %input2, i1 %cond) { +; COMMON-LABEL: define void @test_phi_write( +; COMMON-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { +; COMMON-NEXT: [[BB:.*:]] +; COMMON-NEXT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8 +; COMMON-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101) +; COMMON-NEXT: [[INPUT26:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT25]], align 8 +; COMMON-NEXT: store [[STRUCT_S]] [[INPUT26]], ptr [[INPUT24]], align 4 +; COMMON-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4 +; COMMON-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101) +; COMMON-NEXT: [[INPUT13:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT12]], align 4 +; COMMON-NEXT: store [[STRUCT_S]] [[INPUT13]], ptr [[INPUT11]], align 4 +; COMMON-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]] +; COMMON: [[FIRST]]: +; COMMON-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0 +; COMMON-NEXT: br label %[[MERGE:.*]] +; COMMON: [[SECOND]]: +; COMMON-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT24]], i32 0, i32 1 +; COMMON-NEXT: br label %[[MERGE]] +; COMMON: [[MERGE]]: +; COMMON-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ] +; COMMON-NEXT: store i32 1, ptr [[PTRNEW]], align 4 +; COMMON-NEXT: ret void +; +bb: + br i1 %cond, label %first, label %second + +first: ; preds = %bb + %ptr1 = getelementptr inbounds %struct.S, ptr %input1, i32 0, i32 0 + br label %merge + +second: ; preds = %bb + %ptr2 = getelementptr inbounds %struct.S, ptr %input2, i32 0, i32 1 + br label %merge + +merge: ; preds = %second, %first + %ptrnew = phi ptr [ %ptr1, %first ], [ %ptr2, %second ] + store i32 1, ptr %ptrnew, align 4 + ret void +} +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "no-trapping-math"="true" "target-cpu"="sm_60" "target-features"="+ptx78,+sm_60" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) } -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !6, !7} +!llvm.module.flags = !{!0, !1, !2, !3} +!nvvm.annotations = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19} +!llvm.ident = !{!20, !21} -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 9, i32 1]} +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 8]} !1 = !{i32 1, !"wchar_size", i32 4} !2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{ptr @static_offset, !"kernel", i32 1} -!4 = !{ptr @dynamic_offset, !"kernel", i32 1} -!5 = !{ptr @pointer_escapes, !"kernel", i32 1} -!6 = !{ptr @gep_bitcast, !"kernel", i32 1} -!7 = !{ptr @gep_bitcast_asc, !"kernel", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 2} +!4 = !{ptr @read_only, !"kernel", i32 1} +!5 = !{ptr @escape_ptr, !"kernel", i32 1} +!6 = !{ptr @escape_ptr_gep, !"kernel", i32 1} +!7 = !{ptr @escape_ptr_store, !"kernel", i32 1} +!8 = !{ptr @escape_ptr_gep_store, !"kernel", i32 1} +!9 = !{ptr @escape_ptrtoint, !"kernel", i32 1} +!10 = !{ptr @memcpy_from_param, !"kernel", i32 1} +!11 = !{ptr @memcpy_to_param, !"kernel", i32 1} +!12 = !{ptr @copy_on_store, !"kernel", i32 1} +!13 = !{ptr @read_only_gep, !"kernel", i32 1} +!14 = !{ptr @read_only_gep_asc, !"kernel", i32 1} +!15 = !{ptr @read_only_gep_asc0, !"kernel", i32 1} +!16 = !{ptr @test_select, !"kernel", i32 1} +!17 = !{ptr @test_phi, !"kernel", i32 1} +!18 = !{ptr @test_phi_write, !"kernel", i32 1} +!19 = !{ptr @test_select_write, !"kernel", i32 1} +!20 = !{!"clang version 20.0.0git"} +!21 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"} diff --git a/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir b/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir index 8a83543b0280fd..fd3630bcfad256 100644 --- a/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir +++ b/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir @@ -18,6 +18,9 @@ define i32 @foo2(i32 %a, i32 %b) comdat { ret i32 0 } define i32 @foo3(i32 %a, i32 %b) section ".abc" { ret i32 0 } + + define i32 @foo4(i32 %a, i32 %b) !section_prefix !0 { ret i32 0 } + !0 = !{!"function_section_prefix", !"myprefix"} ... --- name: foo @@ -27,23 +30,24 @@ body: | ; CHECK: bb.0: ; CHECK-NEXT: liveins: $x10, $x11, $x13 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11, implicit $x13 + ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11, implicit $x13 ; CHECK-NEXT: PseudoBR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: liveins: $x10, $x11, $x13 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11, implicit $x13 + ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11, implicit $x13 ; CHECK-NEXT: PseudoBR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: liveins: $x10, $x11, $x13 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11, implicit $x13 + ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11, implicit $x13 ; CHECK-NEXT: PseudoBR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: PseudoRET + ; ; CHECK-FS-LABEL: name: foo ; CHECK-FS: bb.0: ; CHECK-FS-NEXT: liveins: $x10, $x11, $x13 @@ -109,26 +113,27 @@ body: | ; CHECK: bb.0: ; CHECK-NEXT: liveins: $x10, $x11, $x13 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 + ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 ; CHECK-NEXT: $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) :: (dereferenceable load (s32) from @bar) ; CHECK-NEXT: PseudoBR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: liveins: $x10, $x11, $x13 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 + ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 ; CHECK-NEXT: $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) :: (dereferenceable load (s32) from @bar) ; CHECK-NEXT: PseudoBR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: liveins: $x10, $x11, $x13 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 + ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 ; CHECK-NEXT: $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) :: (dereferenceable load (s32) from @bar) ; CHECK-NEXT: PseudoBR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: PseudoRET + ; ; CHECK-FS-LABEL: name: foo2 ; CHECK-FS: bb.0: ; CHECK-FS-NEXT: liveins: $x10, $x11, $x13 @@ -223,6 +228,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: PseudoRET + ; ; CHECK-FS-LABEL: name: foo3 ; CHECK-FS: bb.0: ; CHECK-FS-NEXT: liveins: $x10, $x11, $x13 @@ -289,3 +295,89 @@ body: | bb.3: PseudoRET ... +--- +name: foo4 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: foo4 + ; CHECK: bb.0: + ; CHECK-NEXT: liveins: $x10, $x11, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 + ; CHECK-NEXT: $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) :: (dereferenceable load (s32) from @bar) + ; CHECK-NEXT: PseudoBR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $x10, $x11, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 + ; CHECK-NEXT: $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) :: (dereferenceable load (s32) from @bar) + ; CHECK-NEXT: PseudoBR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: liveins: $x10, $x11, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 + ; CHECK-NEXT: $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) :: (dereferenceable load (s32) from @bar) + ; CHECK-NEXT: PseudoBR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-FS-LABEL: name: foo4 + ; CHECK-FS: bb.0: + ; CHECK-FS-NEXT: liveins: $x10, $x11, $x13 + ; CHECK-FS-NEXT: {{ $}} + ; CHECK-FS-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 + ; CHECK-FS-NEXT: $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) :: (dereferenceable load (s32) from @bar) + ; CHECK-FS-NEXT: PseudoBR %bb.3 + ; CHECK-FS-NEXT: {{ $}} + ; CHECK-FS-NEXT: bb.1: + ; CHECK-FS-NEXT: liveins: $x10, $x11, $x13 + ; CHECK-FS-NEXT: {{ $}} + ; CHECK-FS-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 + ; CHECK-FS-NEXT: $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) :: (dereferenceable load (s32) from @bar) + ; CHECK-FS-NEXT: PseudoBR %bb.3 + ; CHECK-FS-NEXT: {{ $}} + ; CHECK-FS-NEXT: bb.2: + ; CHECK-FS-NEXT: liveins: $x10, $x11, $x13 + ; CHECK-FS-NEXT: {{ $}} + ; CHECK-FS-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 + ; CHECK-FS-NEXT: $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) :: (dereferenceable load (s32) from @bar) + ; CHECK-FS-NEXT: PseudoBR %bb.3 + ; CHECK-FS-NEXT: {{ $}} + ; CHECK-FS-NEXT: bb.3: + ; CHECK-FS-NEXT: PseudoRET + bb.0: + liveins: $x10, $x11, $x13 + + $x11 = ORI $x11, 1023 + $x12 = ADDI $x10, 17 + $x11 = AND $x12, $x11 + $x10 = SUB $x10, $x11 + $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) :: (dereferenceable load (s32) from @bar) + PseudoBR %bb.3 + + bb.1: + liveins: $x10, $x11, $x13 + + $x11 = ORI $x11, 1023 + $x12 = ADDI $x10, 17 + $x11 = AND $x12, $x11 + $x10 = SUB $x10, $x11 + $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) :: (dereferenceable load (s32) from @bar) + PseudoBR %bb.3 + + bb.2: + liveins: $x10, $x11, $x13 + + $x11 = ORI $x11, 1023 + $x12 = ADDI $x10, 17 + $x11 = AND $x12, $x11 + $x10 = SUB $x10, $x11 + $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) :: (dereferenceable load (s32) from @bar) + PseudoBR %bb.3 + + bb.3: + PseudoRET +... diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index e3aabb5de29c28..b5d3e2cd776f27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1348,20 +1348,16 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double ; ; RV64-LABEL: buildvec_v32f64_exact_vlen: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -96 -; RV64-NEXT: .cfi_def_cfa_offset 96 -; RV64-NEXT: fsd fs0, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs1, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs2, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs3, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs4, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs5, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs6, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs7, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs8, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs9, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs10, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs11, 0(sp) # 8-byte Folded Spill +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: fsd fs0, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs1, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs2, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs3, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs4, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs5, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs6, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs7, 0(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset fs0, -8 ; RV64-NEXT: .cfi_offset fs1, -16 ; RV64-NEXT: .cfi_offset fs2, -24 @@ -1370,34 +1366,26 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double ; RV64-NEXT: .cfi_offset fs5, -48 ; RV64-NEXT: .cfi_offset fs6, -56 ; RV64-NEXT: .cfi_offset fs7, -64 -; RV64-NEXT: .cfi_offset fs8, -72 -; RV64-NEXT: .cfi_offset fs9, -80 -; RV64-NEXT: .cfi_offset fs10, -88 -; RV64-NEXT: .cfi_offset fs11, -96 ; RV64-NEXT: fmv.d.x ft4, a7 -; RV64-NEXT: fmv.d.x ft5, a6 -; RV64-NEXT: fmv.d.x ft6, a5 -; RV64-NEXT: fmv.d.x ft7, a4 -; RV64-NEXT: fmv.d.x ft8, a3 -; RV64-NEXT: fmv.d.x ft9, a2 -; RV64-NEXT: fmv.d.x ft10, a1 -; RV64-NEXT: fmv.d.x ft11, a0 -; RV64-NEXT: fld ft0, 216(sp) -; RV64-NEXT: fld ft1, 208(sp) -; RV64-NEXT: fld ft2, 200(sp) -; RV64-NEXT: fld ft3, 192(sp) -; RV64-NEXT: fld fs0, 184(sp) -; RV64-NEXT: fld fs1, 176(sp) -; RV64-NEXT: fld fs2, 168(sp) -; RV64-NEXT: fld fs3, 160(sp) -; RV64-NEXT: fld fs4, 152(sp) -; RV64-NEXT: fld fs5, 144(sp) -; RV64-NEXT: fld fs6, 136(sp) -; RV64-NEXT: fld fs7, 128(sp) -; RV64-NEXT: fld fs8, 104(sp) -; RV64-NEXT: fld fs9, 96(sp) -; RV64-NEXT: fld fs10, 120(sp) -; RV64-NEXT: fld fs11, 112(sp) +; RV64-NEXT: fmv.d.x ft5, a5 +; RV64-NEXT: fmv.d.x ft6, a3 +; RV64-NEXT: fmv.d.x ft7, a1 +; RV64-NEXT: fld ft0, 184(sp) +; RV64-NEXT: fld ft1, 176(sp) +; RV64-NEXT: fld ft2, 168(sp) +; RV64-NEXT: fld ft3, 160(sp) +; RV64-NEXT: fld ft8, 152(sp) +; RV64-NEXT: fld ft9, 144(sp) +; RV64-NEXT: fld ft10, 136(sp) +; RV64-NEXT: fld ft11, 128(sp) +; RV64-NEXT: fld fs0, 120(sp) +; RV64-NEXT: fld fs1, 112(sp) +; RV64-NEXT: fld fs2, 104(sp) +; RV64-NEXT: fld fs3, 96(sp) +; RV64-NEXT: fld fs4, 72(sp) +; RV64-NEXT: fld fs5, 64(sp) +; RV64-NEXT: fld fs6, 88(sp) +; RV64-NEXT: fld fs7, 80(sp) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vfmv.v.f v8, fa2 ; RV64-NEXT: vfslide1down.vf v9, v8, fa3 @@ -1407,43 +1395,39 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double ; RV64-NEXT: vfslide1down.vf v10, v10, fa5 ; RV64-NEXT: vfmv.v.f v11, fa6 ; RV64-NEXT: vfslide1down.vf v11, v11, fa7 -; RV64-NEXT: vfmv.v.f v12, ft11 -; RV64-NEXT: vfslide1down.vf v12, v12, ft10 -; RV64-NEXT: vfmv.v.f v13, ft9 -; RV64-NEXT: vfslide1down.vf v13, v13, ft8 -; RV64-NEXT: vfmv.v.f v14, ft7 -; RV64-NEXT: vfslide1down.vf v14, v14, ft6 -; RV64-NEXT: vfmv.v.f v15, ft5 +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vfslide1down.vf v12, v12, ft7 +; RV64-NEXT: vmv.v.x v13, a2 +; RV64-NEXT: vfslide1down.vf v13, v13, ft6 +; RV64-NEXT: vmv.v.x v14, a4 +; RV64-NEXT: vfslide1down.vf v14, v14, ft5 +; RV64-NEXT: vmv.v.x v15, a6 ; RV64-NEXT: vfslide1down.vf v15, v15, ft4 -; RV64-NEXT: vfmv.v.f v16, fs11 -; RV64-NEXT: vfslide1down.vf v17, v16, fs10 -; RV64-NEXT: vfmv.v.f v16, fs9 -; RV64-NEXT: vfslide1down.vf v16, v16, fs8 -; RV64-NEXT: vfmv.v.f v18, fs7 -; RV64-NEXT: vfslide1down.vf v18, v18, fs6 -; RV64-NEXT: vfmv.v.f v19, fs5 -; RV64-NEXT: vfslide1down.vf v19, v19, fs4 -; RV64-NEXT: vfmv.v.f v20, fs3 -; RV64-NEXT: vfslide1down.vf v20, v20, fs2 -; RV64-NEXT: vfmv.v.f v21, fs1 -; RV64-NEXT: vfslide1down.vf v21, v21, fs0 +; RV64-NEXT: vfmv.v.f v16, fs7 +; RV64-NEXT: vfslide1down.vf v17, v16, fs6 +; RV64-NEXT: vfmv.v.f v16, fs5 +; RV64-NEXT: vfslide1down.vf v16, v16, fs4 +; RV64-NEXT: vfmv.v.f v18, fs3 +; RV64-NEXT: vfslide1down.vf v18, v18, fs2 +; RV64-NEXT: vfmv.v.f v19, fs1 +; RV64-NEXT: vfslide1down.vf v19, v19, fs0 +; RV64-NEXT: vfmv.v.f v20, ft11 +; RV64-NEXT: vfslide1down.vf v20, v20, ft10 +; RV64-NEXT: vfmv.v.f v21, ft9 +; RV64-NEXT: vfslide1down.vf v21, v21, ft8 ; RV64-NEXT: vfmv.v.f v22, ft3 ; RV64-NEXT: vfslide1down.vf v22, v22, ft2 ; RV64-NEXT: vfmv.v.f v23, ft1 ; RV64-NEXT: vfslide1down.vf v23, v23, ft0 -; RV64-NEXT: fld fs0, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs1, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs2, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs3, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs4, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs5, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs6, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs7, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs8, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs9, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs10, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs11, 0(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 96 +; RV64-NEXT: fld fs0, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs1, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs2, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs3, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs4, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs5, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs6, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs7, 0(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 ; RV64-NEXT: ret %v0 = insertelement <32 x double> poison, double %e0, i64 0 %v1 = insertelement <32 x double> %v0, double %e1, i64 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll new file mode 100644 index 00000000000000..62a479bdedf649 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll @@ -0,0 +1,467 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefixes=ZVFBFMIN,ZVFBMIN32 +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefixes=ZVFBFMIN,ZVFBMIN64 + +define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x bfloat> %c) { +; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v1f32: +; ZVFBFWMA: # %bb.0: +; ZVFBFWMA-NEXT: addi sp, sp, -16 +; ZVFBFWMA-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFWMA-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFWMA-NEXT: fsw fa5, 8(sp) +; ZVFBFWMA-NEXT: addi a0, sp, 8 +; ZVFBFWMA-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFBFWMA-NEXT: vle32.v v9, (a0) +; ZVFBFWMA-NEXT: fcvt.s.bf16 fa5, fa1 +; ZVFBFWMA-NEXT: fsw fa5, 12(sp) +; ZVFBFWMA-NEXT: addi a0, sp, 12 +; ZVFBFWMA-NEXT: vle32.v v10, (a0) +; ZVFBFWMA-NEXT: vfmacc.vv v8, v9, v10 +; ZVFBFWMA-NEXT: addi sp, sp, 16 +; ZVFBFWMA-NEXT: ret +; +; ZVFBMIN32-LABEL: vfwmaccbf16_vv_v1f32: +; ZVFBMIN32: # %bb.0: +; ZVFBMIN32-NEXT: addi sp, sp, -32 +; ZVFBMIN32-NEXT: .cfi_def_cfa_offset 32 +; ZVFBMIN32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; ZVFBMIN32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; ZVFBMIN32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; ZVFBMIN32-NEXT: .cfi_offset ra, -4 +; ZVFBMIN32-NEXT: .cfi_offset s0, -8 +; ZVFBMIN32-NEXT: .cfi_offset fs0, -16 +; ZVFBMIN32-NEXT: csrr a0, vlenb +; ZVFBMIN32-NEXT: slli a0, a0, 1 +; ZVFBMIN32-NEXT: sub sp, sp, a0 +; ZVFBMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb +; ZVFBMIN32-NEXT: fmv.s fs0, fa0 +; ZVFBMIN32-NEXT: addi a0, sp, 16 +; ZVFBMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFBMIN32-NEXT: fmv.s fa0, fa1 +; ZVFBMIN32-NEXT: call __truncsfbf2 +; ZVFBMIN32-NEXT: fmv.x.w s0, fa0 +; ZVFBMIN32-NEXT: fmv.s fa0, fs0 +; ZVFBMIN32-NEXT: call __truncsfbf2 +; ZVFBMIN32-NEXT: fmv.x.w a0, fa0 +; ZVFBMIN32-NEXT: slli a0, a0, 16 +; ZVFBMIN32-NEXT: sw a0, 8(sp) +; ZVFBMIN32-NEXT: addi a0, sp, 8 +; ZVFBMIN32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFBMIN32-NEXT: vle32.v v10, (a0) +; ZVFBMIN32-NEXT: slli s0, s0, 16 +; ZVFBMIN32-NEXT: sw s0, 12(sp) +; ZVFBMIN32-NEXT: addi a0, sp, 12 +; ZVFBMIN32-NEXT: vle32.v v9, (a0) +; ZVFBMIN32-NEXT: addi a0, sp, 16 +; ZVFBMIN32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; ZVFBMIN32-NEXT: vfmacc.vv v8, v10, v9 +; ZVFBMIN32-NEXT: csrr a0, vlenb +; ZVFBMIN32-NEXT: slli a0, a0, 1 +; ZVFBMIN32-NEXT: add sp, sp, a0 +; ZVFBMIN32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; ZVFBMIN32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; ZVFBMIN32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; ZVFBMIN32-NEXT: addi sp, sp, 32 +; ZVFBMIN32-NEXT: ret +; +; ZVFBMIN64-LABEL: vfwmaccbf16_vv_v1f32: +; ZVFBMIN64: # %bb.0: +; ZVFBMIN64-NEXT: addi sp, sp, -64 +; ZVFBMIN64-NEXT: .cfi_def_cfa_offset 64 +; ZVFBMIN64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; ZVFBMIN64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; ZVFBMIN64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; ZVFBMIN64-NEXT: .cfi_offset ra, -8 +; ZVFBMIN64-NEXT: .cfi_offset s0, -16 +; ZVFBMIN64-NEXT: .cfi_offset fs0, -24 +; ZVFBMIN64-NEXT: csrr a0, vlenb +; ZVFBMIN64-NEXT: slli a0, a0, 1 +; ZVFBMIN64-NEXT: sub sp, sp, a0 +; ZVFBMIN64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; ZVFBMIN64-NEXT: fmv.s fs0, fa0 +; ZVFBMIN64-NEXT: addi a0, sp, 32 +; ZVFBMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFBMIN64-NEXT: fmv.s fa0, fa1 +; ZVFBMIN64-NEXT: call __truncsfbf2 +; ZVFBMIN64-NEXT: fmv.x.w s0, fa0 +; ZVFBMIN64-NEXT: fmv.s fa0, fs0 +; ZVFBMIN64-NEXT: call __truncsfbf2 +; ZVFBMIN64-NEXT: fmv.x.w a0, fa0 +; ZVFBMIN64-NEXT: slli a0, a0, 16 +; ZVFBMIN64-NEXT: fmv.w.x fa5, a0 +; ZVFBMIN64-NEXT: fsw fa5, 16(sp) +; ZVFBMIN64-NEXT: addi a0, sp, 16 +; ZVFBMIN64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFBMIN64-NEXT: vle32.v v10, (a0) +; ZVFBMIN64-NEXT: slli s0, s0, 16 +; ZVFBMIN64-NEXT: fmv.w.x fa5, s0 +; ZVFBMIN64-NEXT: fsw fa5, 20(sp) +; ZVFBMIN64-NEXT: addi a0, sp, 20 +; ZVFBMIN64-NEXT: vle32.v v9, (a0) +; ZVFBMIN64-NEXT: addi a0, sp, 32 +; ZVFBMIN64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; ZVFBMIN64-NEXT: vfmacc.vv v8, v10, v9 +; ZVFBMIN64-NEXT: csrr a0, vlenb +; ZVFBMIN64-NEXT: slli a0, a0, 1 +; ZVFBMIN64-NEXT: add sp, sp, a0 +; ZVFBMIN64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; ZVFBMIN64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; ZVFBMIN64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; ZVFBMIN64-NEXT: addi sp, sp, 64 +; ZVFBMIN64-NEXT: ret + %b.ext = fpext <1 x bfloat> %b to <1 x float> + %c.ext = fpext <1 x bfloat> %c to <1 x float> + %res = call <1 x float> @llvm.fma.v1f32(<1 x float> %b.ext, <1 x float> %c.ext, <1 x float> %a) + ret <1 x float> %res +} + +define <1 x float> @vfwmaccbf16_vf_v1f32(<1 x float> %a, bfloat %b, <1 x bfloat> %c) { +; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v1f32: +; ZVFBFWMA: # %bb.0: +; ZVFBFWMA-NEXT: addi sp, sp, -16 +; ZVFBFWMA-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFWMA-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFBFWMA-NEXT: fsw fa5, 8(sp) +; ZVFBFWMA-NEXT: addi a0, sp, 8 +; ZVFBFWMA-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFBFWMA-NEXT: vle32.v v9, (a0) +; ZVFBFWMA-NEXT: fcvt.s.bf16 fa5, fa1 +; ZVFBFWMA-NEXT: fsw fa5, 12(sp) +; ZVFBFWMA-NEXT: addi a0, sp, 12 +; ZVFBFWMA-NEXT: vle32.v v10, (a0) +; ZVFBFWMA-NEXT: vfmacc.vv v8, v9, v10 +; ZVFBFWMA-NEXT: addi sp, sp, 16 +; ZVFBFWMA-NEXT: ret +; +; ZVFBMIN32-LABEL: vfwmaccbf16_vf_v1f32: +; ZVFBMIN32: # %bb.0: +; ZVFBMIN32-NEXT: addi sp, sp, -48 +; ZVFBMIN32-NEXT: .cfi_def_cfa_offset 48 +; ZVFBMIN32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; ZVFBMIN32-NEXT: fsd fs0, 32(sp) # 8-byte Folded Spill +; ZVFBMIN32-NEXT: .cfi_offset ra, -4 +; ZVFBMIN32-NEXT: .cfi_offset fs0, -16 +; ZVFBMIN32-NEXT: csrr a0, vlenb +; ZVFBMIN32-NEXT: slli a0, a0, 1 +; ZVFBMIN32-NEXT: sub sp, sp, a0 +; ZVFBMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; ZVFBMIN32-NEXT: fmv.s fs0, fa0 +; ZVFBMIN32-NEXT: addi a0, sp, 32 +; ZVFBMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFBMIN32-NEXT: fmv.s fa0, fa1 +; ZVFBMIN32-NEXT: call __truncsfbf2 +; ZVFBMIN32-NEXT: fmv.x.w a0, fa0 +; ZVFBMIN32-NEXT: fmv.x.w a1, fs0 +; ZVFBMIN32-NEXT: slli a1, a1, 16 +; ZVFBMIN32-NEXT: sw a1, 8(sp) +; ZVFBMIN32-NEXT: addi a1, sp, 8 +; ZVFBMIN32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFBMIN32-NEXT: vle32.v v10, (a1) +; ZVFBMIN32-NEXT: slli a0, a0, 16 +; ZVFBMIN32-NEXT: sw a0, 12(sp) +; ZVFBMIN32-NEXT: addi a0, sp, 12 +; ZVFBMIN32-NEXT: vle32.v v9, (a0) +; ZVFBMIN32-NEXT: addi a0, sp, 32 +; ZVFBMIN32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; ZVFBMIN32-NEXT: vfmacc.vv v8, v10, v9 +; ZVFBMIN32-NEXT: csrr a0, vlenb +; ZVFBMIN32-NEXT: slli a0, a0, 1 +; ZVFBMIN32-NEXT: add sp, sp, a0 +; ZVFBMIN32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; ZVFBMIN32-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload +; ZVFBMIN32-NEXT: addi sp, sp, 48 +; ZVFBMIN32-NEXT: ret +; +; ZVFBMIN64-LABEL: vfwmaccbf16_vf_v1f32: +; ZVFBMIN64: # %bb.0: +; ZVFBMIN64-NEXT: addi sp, sp, -48 +; ZVFBMIN64-NEXT: .cfi_def_cfa_offset 48 +; ZVFBMIN64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; ZVFBMIN64-NEXT: fsd fs0, 32(sp) # 8-byte Folded Spill +; ZVFBMIN64-NEXT: .cfi_offset ra, -8 +; ZVFBMIN64-NEXT: .cfi_offset fs0, -16 +; ZVFBMIN64-NEXT: csrr a0, vlenb +; ZVFBMIN64-NEXT: slli a0, a0, 1 +; ZVFBMIN64-NEXT: sub sp, sp, a0 +; ZVFBMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; ZVFBMIN64-NEXT: fmv.s fs0, fa0 +; ZVFBMIN64-NEXT: addi a0, sp, 32 +; ZVFBMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFBMIN64-NEXT: fmv.s fa0, fa1 +; ZVFBMIN64-NEXT: call __truncsfbf2 +; ZVFBMIN64-NEXT: fmv.x.w a0, fa0 +; ZVFBMIN64-NEXT: fmv.x.w a1, fs0 +; ZVFBMIN64-NEXT: slli a1, a1, 16 +; ZVFBMIN64-NEXT: fmv.w.x fa5, a1 +; ZVFBMIN64-NEXT: fsw fa5, 24(sp) +; ZVFBMIN64-NEXT: addi a1, sp, 24 +; ZVFBMIN64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFBMIN64-NEXT: vle32.v v10, (a1) +; ZVFBMIN64-NEXT: slli a0, a0, 16 +; ZVFBMIN64-NEXT: fmv.w.x fa5, a0 +; ZVFBMIN64-NEXT: fsw fa5, 28(sp) +; ZVFBMIN64-NEXT: addi a0, sp, 28 +; ZVFBMIN64-NEXT: vle32.v v9, (a0) +; ZVFBMIN64-NEXT: addi a0, sp, 32 +; ZVFBMIN64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; ZVFBMIN64-NEXT: vfmacc.vv v8, v10, v9 +; ZVFBMIN64-NEXT: csrr a0, vlenb +; ZVFBMIN64-NEXT: slli a0, a0, 1 +; ZVFBMIN64-NEXT: add sp, sp, a0 +; ZVFBMIN64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; ZVFBMIN64-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload +; ZVFBMIN64-NEXT: addi sp, sp, 48 +; ZVFBMIN64-NEXT: ret + %b.head = insertelement <1 x bfloat> poison, bfloat %b, i32 0 + %b.splat = shufflevector <1 x bfloat> %b.head, <1 x bfloat> poison, <1 x i32> zeroinitializer + %b.ext = fpext <1 x bfloat> %b.splat to <1 x float> + %c.ext = fpext <1 x bfloat> %c to <1 x float> + %res = call <1 x float> @llvm.fma.v1f32(<1 x float> %b.ext, <1 x float> %c.ext, <1 x float> %a) + ret <1 x float> %res +} + +define <2 x float> @vfwmaccbf16_vv_v2f32(<2 x float> %a, <2 x bfloat> %b, <2 x bfloat> %c) { +; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v2f32: +; ZVFBFWMA: # %bb.0: +; ZVFBFWMA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v9, v10 +; ZVFBFWMA-NEXT: ret +; +; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v2f32: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v9 +; ZVFBFMIN-NEXT: ret + %b.ext = fpext <2 x bfloat> %b to <2 x float> + %c.ext = fpext <2 x bfloat> %c to <2 x float> + %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %b.ext, <2 x float> %c.ext, <2 x float> %a) + ret <2 x float> %res +} + +define <2 x float> @vfwmaccbf16_vf_v2f32(<2 x float> %a, bfloat %b, <2 x bfloat> %c) { +; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v2f32: +; ZVFBFWMA: # %bb.0: +; ZVFBFWMA-NEXT: fmv.x.h a0, fa0 +; ZVFBFWMA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFWMA-NEXT: vmv.v.x v10, a0 +; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v10, v9 +; ZVFBFWMA-NEXT: ret +; +; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v2f32: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v10, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v10 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v10 +; ZVFBFMIN-NEXT: ret + %b.head = insertelement <2 x bfloat> poison, bfloat %b, i32 0 + %b.splat = shufflevector <2 x bfloat> %b.head, <2 x bfloat> poison, <2 x i32> zeroinitializer + %b.ext = fpext <2 x bfloat> %b.splat to <2 x float> + %c.ext = fpext <2 x bfloat> %c to <2 x float> + %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %b.ext, <2 x float> %c.ext, <2 x float> %a) + ret <2 x float> %res +} + +define <4 x float> @vfwmaccbf16_vv_v4f32(<4 x float> %a, <4 x bfloat> %b, <4 x bfloat> %c) { +; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v4f32: +; ZVFBFWMA: # %bb.0: +; ZVFBFWMA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v9, v10 +; ZVFBFWMA-NEXT: ret +; +; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v4f32: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v9 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v9 +; ZVFBFMIN-NEXT: ret + %b.ext = fpext <4 x bfloat> %b to <4 x float> + %c.ext = fpext <4 x bfloat> %c to <4 x float> + %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %b.ext, <4 x float> %c.ext, <4 x float> %a) + ret <4 x float> %res +} + +define <4 x float> @vfwmaccbf16_vf_v4f32(<4 x float> %a, bfloat %b, <4 x bfloat> %c) { +; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v4f32: +; ZVFBFWMA: # %bb.0: +; ZVFBFWMA-NEXT: fmv.x.h a0, fa0 +; ZVFBFWMA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFWMA-NEXT: vmv.v.x v10, a0 +; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v10, v9 +; ZVFBFWMA-NEXT: ret +; +; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v4f32: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v10, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v10 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v10 +; ZVFBFMIN-NEXT: ret + %b.head = insertelement <4 x bfloat> poison, bfloat %b, i32 0 + %b.splat = shufflevector <4 x bfloat> %b.head, <4 x bfloat> poison, <4 x i32> zeroinitializer + %b.ext = fpext <4 x bfloat> %b.splat to <4 x float> + %c.ext = fpext <4 x bfloat> %c to <4 x float> + %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %b.ext, <4 x float> %c.ext, <4 x float> %a) + ret <4 x float> %res +} + +define <8 x float> @vfwmaccbf16_vv_v8f32(<8 x float> %a, <8 x bfloat> %b, <8 x bfloat> %c) { +; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v8f32: +; ZVFBFWMA: # %bb.0: +; ZVFBFWMA-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v10, v11 +; ZVFBFWMA-NEXT: ret +; +; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v8f32: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v14, v11 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmacc.vv v8, v12, v14 +; ZVFBFMIN-NEXT: ret + %b.ext = fpext <8 x bfloat> %b to <8 x float> + %c.ext = fpext <8 x bfloat> %c to <8 x float> + %res = call <8 x float> @llvm.fma.v8f32(<8 x float> %b.ext, <8 x float> %c.ext, <8 x float> %a) + ret <8 x float> %res +} + +define <8 x float> @vfwmaccbf16_vf_v8f32(<8 x float> %a, bfloat %b, <8 x bfloat> %c) { +; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v8f32: +; ZVFBFWMA: # %bb.0: +; ZVFBFWMA-NEXT: fmv.x.h a0, fa0 +; ZVFBFWMA-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFWMA-NEXT: vmv.v.x v11, a0 +; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v11, v10 +; ZVFBFWMA-NEXT: ret +; +; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v8f32: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v11, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v11 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v14, v10 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFBFMIN-NEXT: vfmacc.vv v8, v12, v14 +; ZVFBFMIN-NEXT: ret + %b.head = insertelement <8 x bfloat> poison, bfloat %b, i32 0 + %b.splat = shufflevector <8 x bfloat> %b.head, <8 x bfloat> poison, <8 x i32> zeroinitializer + %b.ext = fpext <8 x bfloat> %b.splat to <8 x float> + %c.ext = fpext <8 x bfloat> %c to <8 x float> + %res = call <8 x float> @llvm.fma.v8f32(<8 x float> %b.ext, <8 x float> %c.ext, <8 x float> %a) + ret <8 x float> %res +} + +define <16 x float> @vfwmaccbf16_vv_v16f32(<16 x float> %a, <16 x bfloat> %b, <16 x bfloat> %c) { +; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v16f32: +; ZVFBFWMA: # %bb.0: +; ZVFBFWMA-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v12, v14 +; ZVFBFWMA-NEXT: ret +; +; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v16f32: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v20, v14 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmacc.vv v8, v16, v20 +; ZVFBFMIN-NEXT: ret + %b.ext = fpext <16 x bfloat> %b to <16 x float> + %c.ext = fpext <16 x bfloat> %c to <16 x float> + %res = call <16 x float> @llvm.fma.v16f32(<16 x float> %b.ext, <16 x float> %c.ext, <16 x float> %a) + ret <16 x float> %res +} + +define <16 x float> @vfwmaccbf16_vf_v16f32(<16 x float> %a, bfloat %b, <16 x bfloat> %c) { +; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v16f32: +; ZVFBFWMA: # %bb.0: +; ZVFBFWMA-NEXT: fmv.x.h a0, fa0 +; ZVFBFWMA-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFWMA-NEXT: vmv.v.x v14, a0 +; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v14, v12 +; ZVFBFWMA-NEXT: ret +; +; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v16f32: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v14, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v14 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v20, v12 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFMIN-NEXT: vfmacc.vv v8, v16, v20 +; ZVFBFMIN-NEXT: ret + %b.head = insertelement <16 x bfloat> poison, bfloat %b, i32 0 + %b.splat = shufflevector <16 x bfloat> %b.head, <16 x bfloat> poison, <16 x i32> zeroinitializer + %b.ext = fpext <16 x bfloat> %b.splat to <16 x float> + %c.ext = fpext <16 x bfloat> %c to <16 x float> + %res = call <16 x float> @llvm.fma.v16f32(<16 x float> %b.ext, <16 x float> %c.ext, <16 x float> %a) + ret <16 x float> %res +} + +define <32 x float> @vfwmaccbf32_vv_v32f32(<32 x float> %a, <32 x bfloat> %b, <32 x bfloat> %c) { +; ZVFBFWMA-LABEL: vfwmaccbf32_vv_v32f32: +; ZVFBFWMA: # %bb.0: +; ZVFBFWMA-NEXT: li a0, 32 +; ZVFBFWMA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v16, v20 +; ZVFBFWMA-NEXT: ret +; +; ZVFBFMIN-LABEL: vfwmaccbf32_vv_v32f32: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: li a0, 32 +; ZVFBFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v16 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v20 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmacc.vv v8, v24, v0 +; ZVFBFMIN-NEXT: ret + %b.ext = fpext <32 x bfloat> %b to <32 x float> + %c.ext = fpext <32 x bfloat> %c to <32 x float> + %res = call <32 x float> @llvm.fma.v32f32(<32 x float> %b.ext, <32 x float> %c.ext, <32 x float> %a) + ret <32 x float> %res +} + +define <32 x float> @vfwmaccbf32_vf_v32f32(<32 x float> %a, bfloat %b, <32 x bfloat> %c) { +; ZVFBFWMA-LABEL: vfwmaccbf32_vf_v32f32: +; ZVFBFWMA: # %bb.0: +; ZVFBFWMA-NEXT: fmv.x.h a0, fa0 +; ZVFBFWMA-NEXT: li a1, 32 +; ZVFBFWMA-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFBFWMA-NEXT: vmv.v.x v20, a0 +; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v20, v16 +; ZVFBFWMA-NEXT: ret +; +; ZVFBFMIN-LABEL: vfwmaccbf32_vf_v32f32: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 +; ZVFBFMIN-NEXT: li a1, 32 +; ZVFBFMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v20, a0 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v20 +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v16 +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFBFMIN-NEXT: vfmacc.vv v8, v24, v0 +; ZVFBFMIN-NEXT: ret + %b.head = insertelement <32 x bfloat> poison, bfloat %b, i32 0 + %b.splat = shufflevector <32 x bfloat> %b.head, <32 x bfloat> poison, <32 x i32> zeroinitializer + %b.ext = fpext <32 x bfloat> %b.splat to <32 x float> + %c.ext = fpext <32 x bfloat> %c to <32 x float> + %res = call <32 x float> @llvm.fma.v32f32(<32 x float> %b.ext, <32 x float> %c.ext, <32 x float> %a) + ret <32 x float> %res +} diff --git a/llvm/test/CodeGen/RISCV/rvv/load-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/load-bf16.ll new file mode 100644 index 00000000000000..1108bb16b6712b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/load-bf16.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s + +define @load_nxv1bf16(ptr %p) { +; CHECK-LABEL: load_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret + %x = load , ptr %p + ret %x +} + +define @load_nxv2bf16(ptr %p) { +; CHECK-LABEL: load_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret + %x = load , ptr %p + ret %x +} + +define @load_nxv4bf16(ptr %p) { +; CHECK-LABEL: load_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re16.v v8, (a0) +; CHECK-NEXT: ret + %x = load , ptr %p + ret %x +} + +define @load_nxv8bf16(ptr %p) { +; CHECK-LABEL: load_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re16.v v8, (a0) +; CHECK-NEXT: ret + %x = load , ptr %p + ret %x +} + +define @load_nxv16bf16(ptr %p) { +; CHECK-LABEL: load_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl4re16.v v8, (a0) +; CHECK-NEXT: ret + %x = load , ptr %p + ret %x +} + +define @load_nxv32bf16(ptr %p) { +; CHECK-LABEL: load_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v8, (a0) +; CHECK-NEXT: ret + %x = load , ptr %p + ret %x +} + +define @extload(ptr %p) { +; CHECK-LABEL: extload: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re16.v v10, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 +; CHECK-NEXT: ret + %x = load , ptr %p + %y = fpext %x to + ret %y +} diff --git a/llvm/test/CodeGen/RISCV/rvv/store-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/store-bf16.ll new file mode 100644 index 00000000000000..30cbf9a3903190 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/store-bf16.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s + +define void @store_nxv1bf16( %v, ptr %p) { +; CHECK-LABEL: store_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + store %v, ptr %p + ret void +} + +define void @store_nxv2bf16( %v, ptr %p) { +; CHECK-LABEL: store_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + store %v, ptr %p + ret void +} + +define void @store_nxv4bf16( %v, ptr %p) { +; CHECK-LABEL: store_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vs1r.v v8, (a0) +; CHECK-NEXT: ret + store %v, ptr %p + ret void +} + +define void @store_nxv8bf16( %v, ptr %p) { +; CHECK-LABEL: store_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vs2r.v v8, (a0) +; CHECK-NEXT: ret + store %v, ptr %p + ret void +} + +define void @store_nxv16bf16( %v, ptr %p) { +; CHECK-LABEL: store_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vs4r.v v8, (a0) +; CHECK-NEXT: ret + store %v, ptr %p + ret void +} + +define void @store_nxv32bf32( %v, ptr %p) { +; CHECK-LABEL: store_nxv32bf32: +; CHECK: # %bb.0: +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: ret + store %v, ptr %p + ret void +} + +define void @truncstore( %v, ptr %p) { +; CHECK-LABEL: truncstore: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 +; CHECK-NEXT: vs1r.v v10, (a0) +; CHECK-NEXT: ret + %w = fptrunc %v to + store %w, ptr %p + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll index c0b14d2064d5eb..5f0ba4aa9c3c4f 100644 --- a/llvm/test/CodeGen/WebAssembly/half-precision.ll +++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128 | FileCheck %s -; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128 | FileCheck %s declare float @llvm.wasm.loadf32.f16(ptr) declare void @llvm.wasm.storef16.f32(float, ptr) @@ -28,6 +27,13 @@ define <8 x half> @splat_v8f16(float %x) { ret <8 x half> %v } +; CHECK-LABEL: const_splat_v8f16: +; CHECK: v128.const $push0=, 20800, 0, 0, 0, 0, 0, 0, 20800 +; CHECK-NEXT: return $pop0 +define <8 x half> @const_splat_v8f16() { + ret <8 x half> +} + ; CHECK-LABEL: extract_lane_v8f16: ; CHECK: f16x8.extract_lane $push0=, $0, 1 ; CHECK-NEXT: return $pop0 @@ -308,3 +314,24 @@ define <8 x i16> @trunc_sat_u_v8i16_sat(<8 x half> %x) { %a = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %x) ret <8 x i16> %a } + +; ============================================================================== +; Load and Store +; ============================================================================== +define <8 x half> @load_v8f16(ptr %p) { +; CHECK-LABEL: load_v8f16: +; CHECK: .functype load_v8f16 (i32) -> (v128) +; CHECK-NEXT: v128.load $push0=, 0($0) +; CHECK-NEXT: return $pop0 + %v = load <8 x half>, ptr %p + ret <8 x half> %v +} + +define void @store_v8f16(<8 x half> %v, ptr %p) { +; CHECK-LABEL: store_v8f16: +; CHECK: .functype store_v8f16 (v128, i32) -> () +; CHECK-NEXT: v128.store 0($1), $0 +; CHECK-NEXT: return + store <8 x half> %v , ptr %p + ret void +} diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll index 62b95eedc9d4f1..43c6e4b0db16f2 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -1226,6 +1226,67 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ret <8 x i16> %shift } +define <16 x i8> @constant_shift_v16i8_pairs(<16 x i8> %a) nounwind { +; SSE-LABEL: constant_shift_v16i8_pairs: +; SSE: # %bb.0: +; SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [512,16384,4096,1024,32768,16384,8192,4096] +; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: constant_shift_v16i8_pairs: +; AVX: # %bb.0: +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [512,16384,4096,1024,32768,16384,8192,4096] +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq +; +; XOP-LABEL: constant_shift_v16i8_pairs: +; XOP: # %bb.0: +; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; XOP-NEXT: retq +; +; AVX512DQ-LABEL: constant_shift_v16i8_pairs: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQ-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: constant_shift_v16i8_pairs: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [7,7,2,2,4,4,6,6,1,1,2,2,3,3,4,4] +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512DQVL-LABEL: constant_shift_v16i8_pairs: +; AVX512DQVL: # %bb.0: +; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQVL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQVL-NEXT: vzeroupper +; AVX512DQVL-NEXT: retq +; +; AVX512BWVL-LABEL: constant_shift_v16i8_pairs: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512BWVL-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +; +; X86-SSE-LABEL: constant_shift_v16i8_pairs: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [512,16384,4096,1024,32768,16384,8192,4096] +; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: retl + %shift = lshr <16 x i8> %a, + ret <16 x i8> %shift +} + define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE2-LABEL: constant_shift_v16i8: ; SSE2: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll index 0ef5d650535d23..932f210e239932 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -1345,6 +1345,72 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ret <16 x i16> %shift } +define <32 x i8> @constant_shift_v32i8_pairs(<32 x i8> %a) nounwind { +; AVX1-LABEL: constant_shift_v32i8_pairs: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [512,16384,4096,1024,32768,16384,8192,4096] +; AVX1-NEXT: vpmulhuw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [257,16191,3855,771,32639,16191,7967,3855] +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpmulhuw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: constant_shift_v32i8_pairs: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [512,16384,4096,1024,32768,16384,8192,4096,512,16384,4096,1024,32768,16384,8192,4096] +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; XOPAVX1-LABEL: constant_shift_v32i8_pairs: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [249,249,254,254,252,252,250,250,255,255,254,254,253,253,252,252] +; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: constant_shift_v32i8_pairs: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [249,249,254,254,252,252,250,250,255,255,254,254,253,253,252,252] +; XOPAVX2-NEXT: vpshlb %xmm2, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0 +; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: retq +; +; AVX512DQ-LABEL: constant_shift_v32i8_pairs: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [512,16384,4096,1024,32768,16384,8192,4096,512,16384,4096,1024,32768,16384,8192,4096] +; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: constant_shift_v32i8_pairs: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512DQVL-LABEL: constant_shift_v32i8_pairs: +; AVX512DQVL: # %bb.0: +; AVX512DQVL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [512,16384,4096,1024,32768,16384,8192,4096,512,16384,4096,1024,32768,16384,8192,4096] +; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512DQVL-NEXT: retq +; +; AVX512BWVL-LABEL: constant_shift_v32i8_pairs: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512BWVL-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BWVL-NEXT: retq + %shift = lshr <32 x i8> %a, + ret <32 x i8> %shift +} + define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX1-LABEL: constant_shift_v32i8: ; AVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll index efd73b4ca132bb..8b61540081a7c7 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll @@ -306,6 +306,29 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind { ret <32 x i16> %shift } +define <64 x i8> @constant_shift_v64i8_pairs(<64 x i8> %a) nounwind { +; AVX512DQ-LABEL: constant_shift_v64i8_pairs: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [512,16384,4096,1024,32768,16384,8192,4096,512,16384,4096,1024,32768,16384,8192,4096] +; AVX512DQ-NEXT: # ymm2 = mem[0,1,0,1] +; AVX512DQ-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpmulhuw %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [257,16191,3855,771,32639,16191,7967,3855,257,16191,3855,771,32639,16191,7967,3855,257,16191,3855,771,32639,16191,7967,3855,257,16191,3855,771,32639,16191,7967,3855] +; AVX512DQ-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: constant_shift_v64i8_pairs: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: retq + %shift = lshr <64 x i8> %a, + ret <64 x i8> %shift +} + define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind { ; AVX512DQ-LABEL: constant_shift_v64i8: ; AVX512DQ: # %bb.0: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll index 430b0db87b8846..d1d1b0ab100fb8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll @@ -7,8 +7,8 @@ target triple = "aarch64--linux-gnu" %pair = type { i8, i8 } ; CHECK-LABEL: test -; CHECK: Found an estimated cost of 16 for VF 2 For instruction: {{.*}} load i8 -; CHECK: Found an estimated cost of 0 for VF 2 For instruction: {{.*}} load i8 +; CHECK: Found an estimated cost of 8 for VF 2 For instruction: {{.*}} load i8 +; CHECK: Found an estimated cost of 8 for VF 2 For instruction: {{.*}} load i8 ; CHECK-LABEL: entry: ; CHECK-LABEL: vector.body: ; CHECK: [[LOAD1:%.*]] = load i8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll index 21af9ae801e16c..dec124b55cd4e0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll @@ -168,8 +168,8 @@ entry: ; gaps. ; ; VF_2-LABEL: Checking a loop in 'i64_factor_8' -; VF_2: Found an estimated cost of 16 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 +; VF_2: Found an estimated cost of 8 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8 for.body: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll index c7a04e3669ed62..976c6a9a570af9 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll @@ -15,10 +15,10 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i8_factor_2' -; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp2, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i8 %tmp3, ptr %tmp1, align 1 +; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp2, ptr %tmp0, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp3, ptr %tmp1, align 1 ; VF_4-LABEL: Checking a loop in 'i8_factor_2' ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 @@ -56,10 +56,10 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i16_factor_2' -; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp2, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i16 %tmp3, ptr %tmp1, align 2 +; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp2, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp3, ptr %tmp1, align 2 ; VF_4-LABEL: Checking a loop in 'i16_factor_2' ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 @@ -97,10 +97,10 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i32_factor_2' -; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4 +; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4 ; VF_4-LABEL: Checking a loop in 'i32_factor_2' ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 @@ -138,25 +138,25 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i64_factor_2' -; VF_2: Found an estimated cost of 44 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 44 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8 ; VF_4-LABEL: Checking a loop in 'i64_factor_2' -; VF_4: Found an estimated cost of 88 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 88 for VF 4 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp2, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp3, ptr %tmp1, align 8 ; VF_8-LABEL: Checking a loop in 'i64_factor_2' -; VF_8: Found an estimated cost of 176 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 176 for VF 8 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp2, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp3, ptr %tmp1, align 8 ; VF_16-LABEL: Checking a loop in 'i64_factor_2' -; VF_16: Found an estimated cost of 352 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 352 for VF 16 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp2, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp3, ptr %tmp1, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0 @@ -179,10 +179,10 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f16_factor_2' -; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load half, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load half, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half %tmp2, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store half %tmp3, ptr %tmp1, align 2 +; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load half, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load half, ptr %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp2, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp3, ptr %tmp1, align 2 ; VF_4-LABEL: Checking a loop in 'f16_factor_2' ; VF_4: Found an estimated cost of 18 for VF 4 For instruction: %tmp2 = load half, ptr %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, ptr %tmp1, align 2 @@ -261,25 +261,25 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f64_factor_2' -; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load double, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load double, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double %tmp2, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store double %tmp3, ptr %tmp1, align 8 +; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load double, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load double, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp2, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp3, ptr %tmp1, align 8 ; VF_4-LABEL: Checking a loop in 'f64_factor_2' -; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp2 = load double, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load double, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double %tmp2, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store double %tmp3, ptr %tmp1, align 8 +; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp2 = load double, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp3 = load double, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp2, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp3, ptr %tmp1, align 8 ; VF_8-LABEL: Checking a loop in 'f64_factor_2' -; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp2 = load double, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load double, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double %tmp2, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store double %tmp3, ptr %tmp1, align 8 +; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp2 = load double, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp3 = load double, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp2, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp3, ptr %tmp1, align 8 ; VF_16-LABEL: Checking a loop in 'f64_factor_2' -; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp2 = load double, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load double, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double %tmp2, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store double %tmp3, ptr %tmp1, align 8 +; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp2 = load double, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp3 = load double, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp2, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp3, ptr %tmp1, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f64.2, ptr %data, i64 %i, i32 0 @@ -306,33 +306,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i8_factor_3' -; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp3, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp4, ptr %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 36 for VF 2 For instruction: store i8 %tmp5, ptr %tmp2, align 1 +; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp3, ptr %tmp0, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp4, ptr %tmp1, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp5, ptr %tmp2, align 1 ; VF_4-LABEL: Checking a loop in 'i8_factor_3' -; VF_4: Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp3, ptr %tmp0, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp4, ptr %tmp1, align 1 -; VF_4-NEXT: Found an estimated cost of 72 for VF 4 For instruction: store i8 %tmp5, ptr %tmp2, align 1 +; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp3, ptr %tmp0, align 1 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp4, ptr %tmp1, align 1 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp5, ptr %tmp2, align 1 ; VF_8-LABEL: Checking a loop in 'i8_factor_3' -; VF_8: Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp3, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp4, ptr %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 144 for VF 8 For instruction: store i8 %tmp5, ptr %tmp2, align 1 +; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp3, ptr %tmp0, align 1 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp4, ptr %tmp1, align 1 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp5, ptr %tmp2, align 1 ; VF_16-LABEL: Checking a loop in 'i8_factor_3' -; VF_16: Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp3, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp4, ptr %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 288 for VF 16 For instruction: store i8 %tmp5, ptr %tmp2, align 1 +; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp3, ptr %tmp0, align 1 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp4, ptr %tmp1, align 1 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp5, ptr %tmp2, align 1 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 0 @@ -358,33 +358,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i16_factor_3' -; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp3, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp4, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 36 for VF 2 For instruction: store i16 %tmp5, ptr %tmp2, align 2 +; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp3, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp4, ptr %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp5, ptr %tmp2, align 2 ; VF_4-LABEL: Checking a loop in 'i16_factor_3' -; VF_4: Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp3, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp4, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 72 for VF 4 For instruction: store i16 %tmp5, ptr %tmp2, align 2 +; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp3, ptr %tmp0, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp4, ptr %tmp1, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp5, ptr %tmp2, align 2 ; VF_8-LABEL: Checking a loop in 'i16_factor_3' -; VF_8: Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp3, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp4, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 144 for VF 8 For instruction: store i16 %tmp5, ptr %tmp2, align 2 +; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp3, ptr %tmp0, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp4, ptr %tmp1, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp5, ptr %tmp2, align 2 ; VF_16-LABEL: Checking a loop in 'i16_factor_3' -; VF_16: Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp3, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp4, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 288 for VF 16 For instruction: store i16 %tmp5, ptr %tmp2, align 2 +; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp3, ptr %tmp0, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp4, ptr %tmp1, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp5, ptr %tmp2, align 2 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 0 @@ -410,33 +410,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i32_factor_3' -; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp3, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp4, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 36 for VF 2 For instruction: store i32 %tmp5, ptr %tmp2, align 4 +; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp3, ptr %tmp0, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp4, ptr %tmp1, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp5, ptr %tmp2, align 4 ; VF_4-LABEL: Checking a loop in 'i32_factor_3' -; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp3, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp4, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i32 %tmp5, ptr %tmp2, align 4 +; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp3, ptr %tmp0, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp4, ptr %tmp1, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp5, ptr %tmp2, align 4 ; VF_8-LABEL: Checking a loop in 'i32_factor_3' -; VF_8: Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp3, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp4, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 144 for VF 8 For instruction: store i32 %tmp5, ptr %tmp2, align 4 +; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp3, ptr %tmp0, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp4, ptr %tmp1, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp5, ptr %tmp2, align 4 ; VF_16-LABEL: Checking a loop in 'i32_factor_3' -; VF_16: Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp3, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp4, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 288 for VF 16 For instruction: store i32 %tmp5, ptr %tmp2, align 4 +; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp3, ptr %tmp0, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp4, ptr %tmp1, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp5, ptr %tmp2, align 4 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 0 @@ -462,33 +462,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i64_factor_3' -; VF_2: Found an estimated cost of 66 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp3, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp4, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 66 for VF 2 For instruction: store i64 %tmp5, ptr %tmp2, align 8 +; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp3, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp4, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp5, ptr %tmp2, align 8 ; VF_4-LABEL: Checking a loop in 'i64_factor_3' -; VF_4: Found an estimated cost of 132 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp3, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp4, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 132 for VF 4 For instruction: store i64 %tmp5, ptr %tmp2, align 8 +; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp3, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp4, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp5, ptr %tmp2, align 8 ; VF_8-LABEL: Checking a loop in 'i64_factor_3' -; VF_8: Found an estimated cost of 264 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp3, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp4, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 264 for VF 8 For instruction: store i64 %tmp5, ptr %tmp2, align 8 +; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp3, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp4, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp5, ptr %tmp2, align 8 ; VF_16-LABEL: Checking a loop in 'i64_factor_3' -; VF_16: Found an estimated cost of 528 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp3, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp4, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 528 for VF 16 For instruction: store i64 %tmp5, ptr %tmp2, align 8 +; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp3, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp4, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp5, ptr %tmp2, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 0 @@ -514,12 +514,12 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f16_factor_3' -; VF_2: Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load half, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load half, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, ptr %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half %tmp3, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half %tmp4, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store half %tmp5, ptr %tmp2, align 2 +; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load half, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load half, ptr %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load half, ptr %tmp2, align 2 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp3, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp4, ptr %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp5, ptr %tmp2, align 2 ; VF_4-LABEL: Checking a loop in 'f16_factor_3' ; VF_4: Found an estimated cost of 28 for VF 4 For instruction: %tmp3 = load half, ptr %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, ptr %tmp1, align 2 @@ -573,12 +573,12 @@ entry: ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp4, ptr %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store float %tmp5, ptr %tmp2, align 4 ; VF_4-LABEL: Checking a loop in 'f32_factor_3' -; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load float, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load float, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, ptr %tmp2, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float %tmp3, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float %tmp4, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store float %tmp5, ptr %tmp2, align 4 +; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp3 = load float, ptr %tmp0, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load float, ptr %tmp1, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load float, ptr %tmp2, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp3, ptr %tmp0, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp4, ptr %tmp1, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp5, ptr %tmp2, align 4 ; VF_8-LABEL: Checking a loop in 'f32_factor_3' ; VF_8: Found an estimated cost of 64 for VF 8 For instruction: %tmp3 = load float, ptr %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, ptr %tmp1, align 4 @@ -618,33 +618,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f64_factor_3' -; VF_2: Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load double, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load double, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double %tmp3, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double %tmp4, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store double %tmp5, ptr %tmp2, align 8 +; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load double, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load double, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load double, ptr %tmp2, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp3, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp4, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp5, ptr %tmp2, align 8 ; VF_4-LABEL: Checking a loop in 'f64_factor_3' -; VF_4: Found an estimated cost of 36 for VF 4 For instruction: %tmp3 = load double, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load double, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double %tmp3, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double %tmp4, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 36 for VF 4 For instruction: store double %tmp5, ptr %tmp2, align 8 +; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp3 = load double, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp4 = load double, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp5 = load double, ptr %tmp2, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp3, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp4, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp5, ptr %tmp2, align 8 ; VF_8-LABEL: Checking a loop in 'f64_factor_3' -; VF_8: Found an estimated cost of 72 for VF 8 For instruction: %tmp3 = load double, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load double, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double %tmp3, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double %tmp4, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 72 for VF 8 For instruction: store double %tmp5, ptr %tmp2, align 8 +; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp3 = load double, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp4 = load double, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp5 = load double, ptr %tmp2, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp3, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp4, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp5, ptr %tmp2, align 8 ; VF_16-LABEL: Checking a loop in 'f64_factor_3' -; VF_16: Found an estimated cost of 144 for VF 16 For instruction: %tmp3 = load double, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load double, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double %tmp3, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double %tmp4, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 144 for VF 16 For instruction: store double %tmp5, ptr %tmp2, align 8 +; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp3 = load double, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp4 = load double, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp5 = load double, ptr %tmp2, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp3, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp4, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp5, ptr %tmp2, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 0 @@ -673,41 +673,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i8_factor_4' -; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp4, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp5, ptr %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp6, ptr %tmp2, align 1 -; VF_2-NEXT: Found an estimated cost of 48 for VF 2 For instruction: store i8 %tmp7, ptr %tmp3, align 1 +; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp4, ptr %tmp0, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp5, ptr %tmp1, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp6, ptr %tmp2, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp7, ptr %tmp3, align 1 ; VF_4-LABEL: Checking a loop in 'i8_factor_4' -; VF_4: Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp4, ptr %tmp0, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp5, ptr %tmp1, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp6, ptr %tmp2, align 1 -; VF_4-NEXT: Found an estimated cost of 96 for VF 4 For instruction: store i8 %tmp7, ptr %tmp3, align 1 +; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp4, ptr %tmp0, align 1 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp5, ptr %tmp1, align 1 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp6, ptr %tmp2, align 1 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp7, ptr %tmp3, align 1 ; VF_8-LABEL: Checking a loop in 'i8_factor_4' -; VF_8: Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp4, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp5, ptr %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp6, ptr %tmp2, align 1 -; VF_8-NEXT: Found an estimated cost of 192 for VF 8 For instruction: store i8 %tmp7, ptr %tmp3, align 1 +; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp4, ptr %tmp0, align 1 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp5, ptr %tmp1, align 1 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp6, ptr %tmp2, align 1 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp7, ptr %tmp3, align 1 ; VF_16-LABEL: Checking a loop in 'i8_factor_4' -; VF_16: Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp4, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp5, ptr %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp6, ptr %tmp2, align 1 -; VF_16-NEXT: Found an estimated cost of 384 for VF 16 For instruction: store i8 %tmp7, ptr %tmp3, align 1 +; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp4, ptr %tmp0, align 1 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp5, ptr %tmp1, align 1 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp6, ptr %tmp2, align 1 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp7, ptr %tmp3, align 1 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 0 @@ -736,41 +736,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i16_factor_4' -; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp4, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp5, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp6, ptr %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 48 for VF 2 For instruction: store i16 %tmp7, ptr %tmp3, align 2 +; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp4, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp5, ptr %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp6, ptr %tmp2, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp7, ptr %tmp3, align 2 ; VF_4-LABEL: Checking a loop in 'i16_factor_4' -; VF_4: Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp4, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp5, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp6, ptr %tmp2, align 2 -; VF_4-NEXT: Found an estimated cost of 96 for VF 4 For instruction: store i16 %tmp7, ptr %tmp3, align 2 +; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp4, ptr %tmp0, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp5, ptr %tmp1, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp6, ptr %tmp2, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp7, ptr %tmp3, align 2 ; VF_8-LABEL: Checking a loop in 'i16_factor_4' -; VF_8: Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp4, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp5, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp6, ptr %tmp2, align 2 -; VF_8-NEXT: Found an estimated cost of 192 for VF 8 For instruction: store i16 %tmp7, ptr %tmp3, align 2 +; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp4, ptr %tmp0, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp5, ptr %tmp1, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp6, ptr %tmp2, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp7, ptr %tmp3, align 2 ; VF_16-LABEL: Checking a loop in 'i16_factor_4' -; VF_16: Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp4, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp5, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp6, ptr %tmp2, align 2 -; VF_16-NEXT: Found an estimated cost of 384 for VF 16 For instruction: store i16 %tmp7, ptr %tmp3, align 2 +; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp4, ptr %tmp0, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp5, ptr %tmp1, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp6, ptr %tmp2, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp7, ptr %tmp3, align 2 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 0 @@ -799,41 +799,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i32_factor_4' -; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp4, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp5, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp6, ptr %tmp2, align 4 -; VF_2-NEXT: Found an estimated cost of 48 for VF 2 For instruction: store i32 %tmp7, ptr %tmp3, align 4 +; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp4, ptr %tmp0, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp5, ptr %tmp1, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp6, ptr %tmp2, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp7, ptr %tmp3, align 4 ; VF_4-LABEL: Checking a loop in 'i32_factor_4' -; VF_4: Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp4, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp5, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp6, ptr %tmp2, align 4 -; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store i32 %tmp7, ptr %tmp3, align 4 +; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp4, ptr %tmp0, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp5, ptr %tmp1, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp6, ptr %tmp2, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp7, ptr %tmp3, align 4 ; VF_8-LABEL: Checking a loop in 'i32_factor_4' -; VF_8: Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp4, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp5, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp6, ptr %tmp2, align 4 -; VF_8-NEXT: Found an estimated cost of 192 for VF 8 For instruction: store i32 %tmp7, ptr %tmp3, align 4 +; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp4, ptr %tmp0, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp5, ptr %tmp1, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp6, ptr %tmp2, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp7, ptr %tmp3, align 4 ; VF_16-LABEL: Checking a loop in 'i32_factor_4' -; VF_16: Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp4, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp5, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp6, ptr %tmp2, align 4 -; VF_16-NEXT: Found an estimated cost of 384 for VF 16 For instruction: store i32 %tmp7, ptr %tmp3, align 4 +; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp4, ptr %tmp0, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp5, ptr %tmp1, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp6, ptr %tmp2, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp7, ptr %tmp3, align 4 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 0 @@ -862,41 +862,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i64_factor_4' -; VF_2: Found an estimated cost of 88 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp4, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp5, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp6, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 88 for VF 2 For instruction: store i64 %tmp7, ptr %tmp3, align 8 +; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp4, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp5, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp6, ptr %tmp2, align 8 +; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp7, ptr %tmp3, align 8 ; VF_4-LABEL: Checking a loop in 'i64_factor_4' -; VF_4: Found an estimated cost of 176 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp4, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp5, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp6, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 176 for VF 4 For instruction: store i64 %tmp7, ptr %tmp3, align 8 +; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp4, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp5, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp6, ptr %tmp2, align 8 +; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp7, ptr %tmp3, align 8 ; VF_8-LABEL: Checking a loop in 'i64_factor_4' -; VF_8: Found an estimated cost of 352 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp4, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp5, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp6, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 352 for VF 8 For instruction: store i64 %tmp7, ptr %tmp3, align 8 +; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp4, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp5, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp6, ptr %tmp2, align 8 +; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp7, ptr %tmp3, align 8 ; VF_16-LABEL: Checking a loop in 'i64_factor_4' -; VF_16: Found an estimated cost of 704 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp4, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp5, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp6, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 704 for VF 16 For instruction: store i64 %tmp7, ptr %tmp3, align 8 +; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp4, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp5, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp6, ptr %tmp2, align 8 +; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp7, ptr %tmp3, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 0 @@ -997,14 +997,14 @@ entry: ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp6, ptr %tmp2, align 4 ; VF_2-NEXT: Found an estimated cost of 20 for VF 2 For instruction: store float %tmp7, ptr %tmp3, align 4 ; VF_4-LABEL: Checking a loop in 'f32_factor_4' -; VF_4: Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load float, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load float, ptr %tmp2, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load float, ptr %tmp3, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float %tmp4, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float %tmp5, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float %tmp6, ptr %tmp2, align 4 -; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store float %tmp7, ptr %tmp3, align 4 +; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load float, ptr %tmp0, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load float, ptr %tmp1, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp6 = load float, ptr %tmp2, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp7 = load float, ptr %tmp3, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp4, ptr %tmp0, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp5, ptr %tmp1, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp6, ptr %tmp2, align 4 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp7, ptr %tmp3, align 4 ; VF_8-LABEL: Checking a loop in 'f32_factor_4' ; VF_8: Found an estimated cost of 80 for VF 8 For instruction: %tmp4 = load float, ptr %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, ptr %tmp1, align 4 @@ -1051,41 +1051,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f64_factor_4' -; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp4 = load double, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load double, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load double, ptr %tmp3, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double %tmp4, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double %tmp5, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double %tmp6, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store double %tmp7, ptr %tmp3, align 8 +; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load double, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load double, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp6 = load double, ptr %tmp2, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp7 = load double, ptr %tmp3, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp4, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp5, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp6, ptr %tmp2, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp7, ptr %tmp3, align 8 ; VF_4-LABEL: Checking a loop in 'f64_factor_4' -; VF_4: Found an estimated cost of 48 for VF 4 For instruction: %tmp4 = load double, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load double, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load double, ptr %tmp3, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double %tmp4, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double %tmp5, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double %tmp6, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 48 for VF 4 For instruction: store double %tmp7, ptr %tmp3, align 8 +; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp4 = load double, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp5 = load double, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp6 = load double, ptr %tmp2, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp7 = load double, ptr %tmp3, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp4, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp5, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp6, ptr %tmp2, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp7, ptr %tmp3, align 8 ; VF_8-LABEL: Checking a loop in 'f64_factor_4' -; VF_8: Found an estimated cost of 96 for VF 8 For instruction: %tmp4 = load double, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load double, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load double, ptr %tmp3, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double %tmp4, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double %tmp5, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double %tmp6, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 96 for VF 8 For instruction: store double %tmp7, ptr %tmp3, align 8 +; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp4 = load double, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp5 = load double, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp6 = load double, ptr %tmp2, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp7 = load double, ptr %tmp3, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp4, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp5, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp6, ptr %tmp2, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp7, ptr %tmp3, align 8 ; VF_16-LABEL: Checking a loop in 'f64_factor_4' -; VF_16: Found an estimated cost of 192 for VF 16 For instruction: %tmp4 = load double, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load double, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load double, ptr %tmp3, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double %tmp4, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double %tmp5, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double %tmp6, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 192 for VF 16 For instruction: store double %tmp7, ptr %tmp3, align 8 +; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp4 = load double, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp5 = load double, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp6 = load double, ptr %tmp2, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp7 = load double, ptr %tmp3, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp4, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp5, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp6, ptr %tmp2, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp7, ptr %tmp3, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 6d309c4453c7e1..df02cb741700e5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -176,7 +176,7 @@ declare i16 @llvm.umax.i16(i16, i16) ; Test case for https://github.com/llvm/llvm-project/issues/106780. define i32 @cost_of_exit_branch_and_cond_insts(ptr %a, ptr %b, i1 %c, i16 %x) #0 { ; CHECK-LABEL: define i32 @cost_of_exit_branch_and_cond_insts( -; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i1 [[C:%.*]], i16 [[X:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i1 [[C:%.*]], i16 [[X:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[X]] to i32 ; CHECK-NEXT: [[UMAX3:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP0]], i32 111) @@ -404,6 +404,95 @@ exit: ret void } +; Test for https://github.com/llvm/llvm-project/issues/108098. +define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %src, ptr noalias %dst) #0 { +; CHECK-LABEL: define void @gather_interleave_group_with_dead_insert_pos( +; CHECK-SAME: i64 [[N:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[SMAX]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 16 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 16, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <32 x i8>, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = zext <8 x i8> [[STRIDED_VEC4]] to <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = zext <8 x i8> [[STRIDED_VEC5]] to <8 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST]], <8 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST]], <8 x i64> [[STEP_ADD]] +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP11]], <8 x ptr> [[TMP13]], i32 4, <8 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP12]], <8 x ptr> [[TMP14]], i32 4, <8 x i1> ) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L_DEAD:%.*]] = load i8, ptr [[GEP_SRC_0]], align 1 +; CHECK-NEXT: [[IV_1:%.*]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_1]] +; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1 +; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[L_1]] to i32 +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store i32 [[EXT]], ptr [[GEP_DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 4 +; CHECK-NEXT: [[EC:%.*]] = icmp slt i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %gep.src.0 = getelementptr i8, ptr %src, i64 %iv + %l.dead = load i8, ptr %gep.src.0, align 1 + %iv.1 = add i64 %iv, 1 + %gep.src.1 = getelementptr i8, ptr %src, i64 %iv.1 + %l.1 = load i8, ptr %gep.src.1, align 1 + %ext = zext i8 %l.1 to i32 + %gep.dst = getelementptr i32, ptr %dst, i64 %iv + store i32 %ext, ptr %gep.dst, align 4 + %iv.next = add nsw i64 %iv, 4 + %ec = icmp slt i64 %iv, %N + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + +attributes #0 = { "target-features"="+64bit,+v" } + ;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} @@ -425,4 +514,6 @@ exit: ; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]]} ; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]} ; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META1]]} +; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]} +; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-03.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-03.ll index 88eb9c4d27e33a..13c443c4d579f4 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-03.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-03.ll @@ -6,7 +6,8 @@ ; Check cost function for <8 x i128> store interleave group. ; CHECK: LV: Checking a loop in 'fun' -; CHECK: LV: Found an estimated cost of 8 for VF 4 For instruction: store i128 8721036757475490113 +; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i128 8721036757475490113 +; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i128 8721036757475490113 define noundef i32 @fun(i32 %argc, ptr nocapture readnone %argv) { entry: diff --git a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll index 7959e4d0760edb..56ed92ea1b7ffb 100644 --- a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll +++ b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll @@ -4,6 +4,7 @@ ; There is no scenario currently of doing ctx profile use without thinlto. ; ; RUN: opt -passes='thinlto-pre-link' -use-ctx-profile=something_that_does_not_exist %s -S | FileCheck %s +; RUN: opt -debug-info-for-profiling -passes='thinlto-pre-link' -use-ctx-profile=something_that_does_not_exist %s -S | FileCheck %s declare void @bar() diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index 36681ecea4f50f..01c842edd88e41 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -45,13 +45,13 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP16]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX3_2]], i64 2, <2 x i1> , i32 2) ; CHECK-NEXT: [[TMP13:%.*]] = zext <2 x i8> [[TMP9]] to <2 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX5_2]], i64 2, <2 x i1> , i32 2) -; CHECK-NEXT: [[TMP12:%.*]] = zext <2 x i8> [[TMP28]] to <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX5_2]], i64 2, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP12:%.*]] = zext <2 x i8> [[TMP26]] to <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = sub <2 x i32> [[TMP13]], [[TMP12]] ; CHECK-NEXT: [[TMP25:%.*]] = shl <2 x i32> [[TMP24]], ; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i32> [[TMP25]], [[TMP8]] -; CHECK-NEXT: [[TMP29:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX8_2]], i64 2, <2 x i1> , i32 2) -; CHECK-NEXT: [[TMP17:%.*]] = zext <2 x i8> [[TMP29]] to <2 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX8_2]], i64 2, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP17:%.*]] = zext <2 x i8> [[TMP28]] to <2 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX10_2]], i64 2, <2 x i1> , i32 2) ; CHECK-NEXT: [[TMP19:%.*]] = zext <2 x i8> [[TMP18]] to <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = sub <2 x i32> [[TMP17]], [[TMP19]] @@ -62,16 +62,16 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP36:%.*]] = sub <2 x i32> [[TMP22]], [[TMP30]] ; CHECK-NEXT: [[TMP37:%.*]] = shl <2 x i32> [[TMP36]], ; CHECK-NEXT: [[TMP27:%.*]] = add <2 x i32> [[TMP37]], [[TMP20]] -; CHECK-NEXT: [[TMP26:%.*]] = add <2 x i32> [[TMP27]], [[TMP15]] -; CHECK-NEXT: [[TMP38:%.*]] = sub <2 x i32> [[TMP15]], [[TMP27]] -; CHECK-NEXT: [[ADD44_2:%.*]] = extractelement <2 x i32> [[TMP26]], i32 0 -; CHECK-NEXT: [[CONV:%.*]] = extractelement <2 x i32> [[TMP26]], i32 1 -; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[CONV]], [[ADD44_2]] -; CHECK-NEXT: [[SUB51_2:%.*]] = sub i32 [[ADD44_2]], [[CONV]] +; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP27]], [[TMP15]] +; CHECK-NEXT: [[TMP29:%.*]] = sub <2 x i32> [[TMP15]], [[TMP27]] ; CHECK-NEXT: [[SUB45_2:%.*]] = extractelement <2 x i32> [[TMP38]], i32 0 ; CHECK-NEXT: [[SUB47_2:%.*]] = extractelement <2 x i32> [[TMP38]], i32 1 -; CHECK-NEXT: [[ADD55_2:%.*]] = add i32 [[SUB47_2]], [[SUB45_2]] -; CHECK-NEXT: [[SUB59_2:%.*]] = sub i32 [[SUB45_2]], [[SUB47_2]] +; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[SUB47_2]], [[SUB45_2]] +; CHECK-NEXT: [[SUB51_2:%.*]] = sub i32 [[SUB45_2]], [[SUB47_2]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i32> [[TMP29]], i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x i32> [[TMP29]], i32 1 +; CHECK-NEXT: [[ADD55_2:%.*]] = add i32 [[TMP34]], [[TMP32]] +; CHECK-NEXT: [[SUB59_2:%.*]] = sub i32 [[TMP32]], [[TMP34]] ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr i8, ptr null, i64 4 ; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4 ; CHECK-NEXT: [[ARRAYIDX8_3:%.*]] = getelementptr i8, ptr null, i64 1 @@ -80,17 +80,17 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[ARRAYIDX15_3:%.*]] = getelementptr i8, ptr null, i64 5 ; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr null, align 1 ; CHECK-NEXT: [[TMP53:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 2, <2 x i1> , i32 2) -; CHECK-NEXT: [[TMP33:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32> +; CHECK-NEXT: [[TMP58:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32> ; CHECK-NEXT: [[TMP54:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 2, <2 x i1> , i32 2) ; CHECK-NEXT: [[TMP39:%.*]] = zext <2 x i8> [[TMP54]] to <2 x i32> -; CHECK-NEXT: [[TMP40:%.*]] = sub <2 x i32> [[TMP33]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = sub <2 x i32> [[TMP58]], [[TMP39]] ; CHECK-NEXT: [[TMP41:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX3_3]], i64 -4, <2 x i1> , i32 2) ; CHECK-NEXT: [[TMP42:%.*]] = zext <2 x i8> [[TMP41]] to <2 x i32> -; CHECK-NEXT: [[TMP58:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX5_3]], i64 2, <2 x i1> , i32 2) -; CHECK-NEXT: [[TMP59:%.*]] = zext <2 x i8> [[TMP58]] to <2 x i32> -; CHECK-NEXT: [[TMP45:%.*]] = sub <2 x i32> [[TMP42]], [[TMP59]] +; CHECK-NEXT: [[TMP59:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX5_3]], i64 2, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP62:%.*]] = zext <2 x i8> [[TMP59]] to <2 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = sub <2 x i32> [[TMP42]], [[TMP62]] ; CHECK-NEXT: [[TMP46:%.*]] = shl <2 x i32> [[TMP45]], -; CHECK-NEXT: [[TMP62:%.*]] = add <2 x i32> [[TMP46]], [[TMP40]] +; CHECK-NEXT: [[TMP68:%.*]] = add <2 x i32> [[TMP46]], [[TMP40]] ; CHECK-NEXT: [[TMP48:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX8_3]], i64 2, <2 x i1> , i32 2) ; CHECK-NEXT: [[TMP49:%.*]] = zext <2 x i8> [[TMP48]] to <2 x i32> ; CHECK-NEXT: [[TMP50:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX10_3]], i64 2, <2 x i1> , i32 2) @@ -104,53 +104,53 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP69:%.*]] = sub <2 x i32> [[TMP55]], [[TMP57]] ; CHECK-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], ; CHECK-NEXT: [[TMP60:%.*]] = add <2 x i32> [[TMP70]], [[TMP52]] -; CHECK-NEXT: [[TMP72:%.*]] = add <2 x i32> [[TMP60]], [[TMP62]] -; CHECK-NEXT: [[TMP47:%.*]] = sub <2 x i32> [[TMP62]], [[TMP60]] -; CHECK-NEXT: [[TMP74:%.*]] = extractelement <2 x i32> [[TMP72]], i32 0 -; CHECK-NEXT: [[TMP75:%.*]] = extractelement <2 x i32> [[TMP72]], i32 1 -; CHECK-NEXT: [[ADD48_3:%.*]] = add i32 [[TMP75]], [[TMP74]] -; CHECK-NEXT: [[SUB51_3:%.*]] = sub i32 [[TMP74]], [[TMP75]] +; CHECK-NEXT: [[TMP47:%.*]] = add <2 x i32> [[TMP60]], [[TMP68]] +; CHECK-NEXT: [[TMP33:%.*]] = sub <2 x i32> [[TMP68]], [[TMP60]] ; CHECK-NEXT: [[TMP61:%.*]] = extractelement <2 x i32> [[TMP47]], i32 0 ; CHECK-NEXT: [[TMP79:%.*]] = extractelement <2 x i32> [[TMP47]], i32 1 -; CHECK-NEXT: [[ADD55_3:%.*]] = add i32 [[TMP79]], [[TMP61]] -; CHECK-NEXT: [[SUB59_3:%.*]] = sub i32 [[TMP61]], [[TMP79]] -; CHECK-NEXT: [[ADD94:%.*]] = add i32 [[ADD48_3]], [[ADD48_2]] -; CHECK-NEXT: [[SUB102:%.*]] = sub i32 [[ADD48_2]], [[ADD48_3]] +; CHECK-NEXT: [[ADD48_3:%.*]] = add i32 [[TMP79]], [[TMP61]] +; CHECK-NEXT: [[SUB51_3:%.*]] = sub i32 [[TMP61]], [[TMP79]] ; CHECK-NEXT: [[TMP63:%.*]] = extractelement <2 x i32> [[TMP33]], i32 0 -; CHECK-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[TMP63]], 15 +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <2 x i32> [[TMP33]], i32 1 +; CHECK-NEXT: [[ADD55_3:%.*]] = add i32 [[TMP71]], [[TMP63]] +; CHECK-NEXT: [[SUB59_3:%.*]] = sub i32 [[TMP63]], [[TMP71]] +; CHECK-NEXT: [[ADD95:%.*]] = add i32 [[ADD48_3]], [[ADD48_2]] +; CHECK-NEXT: [[SUB102:%.*]] = sub i32 [[ADD48_2]], [[ADD48_3]] +; CHECK-NEXT: [[TMP77:%.*]] = extractelement <2 x i32> [[TMP58]], i32 0 +; CHECK-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[TMP77]], 15 ; CHECK-NEXT: [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537 ; CHECK-NEXT: [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535 -; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[CONV]], 15 +; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[SUB47_2]], 15 ; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537 ; CHECK-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535 ; CHECK-NEXT: [[ADD94_1:%.*]] = add i32 [[ADD55_3]], [[ADD55_2]] ; CHECK-NEXT: [[SUB102_1:%.*]] = sub i32 [[ADD55_2]], [[ADD55_3]] ; CHECK-NEXT: [[TMP107:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0 -; CHECK-NEXT: [[SHR_I49_5:%.*]] = lshr i32 [[TMP107]], 15 -; CHECK-NEXT: [[AND_I50_5:%.*]] = and i32 [[SHR_I49_5]], 65537 -; CHECK-NEXT: [[MUL_I51_5:%.*]] = mul i32 [[AND_I50_5]], 65535 +; CHECK-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[TMP107]], 15 +; CHECK-NEXT: [[AND_I50_1:%.*]] = and i32 [[SHR_I49_1]], 65537 +; CHECK-NEXT: [[MUL_I51_1:%.*]] = mul i32 [[AND_I50_1]], 65535 ; CHECK-NEXT: [[ADD94_4:%.*]] = add i32 [[SUB51_3]], [[SUB51_2]] ; CHECK-NEXT: [[SUB102_2:%.*]] = sub i32 [[SUB51_2]], [[SUB51_3]] -; CHECK-NEXT: [[SHR_I49_4:%.*]] = lshr i32 [[CONV_1]], 15 -; CHECK-NEXT: [[AND_I50_4:%.*]] = and i32 [[SHR_I49_4]], 65537 -; CHECK-NEXT: [[MUL_I51_4:%.*]] = mul i32 [[AND_I50_4]], 65535 -; CHECK-NEXT: [[ADD94_5:%.*]] = add i32 [[SUB59_3]], [[SUB59_2]] -; CHECK-NEXT: [[SUB102_3:%.*]] = sub i32 [[SUB59_2]], [[SUB59_3]] -; CHECK-NEXT: [[SHR_I49_6:%.*]] = lshr i32 [[CONV1]], 15 +; CHECK-NEXT: [[SHR_I49_6:%.*]] = lshr i32 [[CONV_1]], 15 ; CHECK-NEXT: [[AND_I50_6:%.*]] = and i32 [[SHR_I49_6]], 65537 ; CHECK-NEXT: [[MUL_I51_6:%.*]] = mul i32 [[AND_I50_6]], 65535 +; CHECK-NEXT: [[ADD94_5:%.*]] = add i32 [[SUB59_3]], [[SUB59_2]] +; CHECK-NEXT: [[SUB102_3:%.*]] = sub i32 [[SUB59_2]], [[SUB59_3]] +; CHECK-NEXT: [[SHR_I49_4:%.*]] = lshr i32 [[CONV1]], 15 +; CHECK-NEXT: [[AND_I50_4:%.*]] = and i32 [[SHR_I49_4]], 65537 +; CHECK-NEXT: [[MUL_I51_4:%.*]] = mul i32 [[AND_I50_4]], 65535 ; CHECK-NEXT: [[TMP66:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1 ; CHECK-NEXT: [[TMP102:%.*]] = zext <2 x i8> [[TMP66]] to <2 x i32> ; CHECK-NEXT: [[TMP67:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[PIX2]], i64 2, <2 x i1> , i32 2) -; CHECK-NEXT: [[TMP77:%.*]] = zext <2 x i8> [[TMP67]] to <2 x i32> +; CHECK-NEXT: [[TMP72:%.*]] = zext <2 x i8> [[TMP67]] to <2 x i32> ; CHECK-NEXT: [[TMP73:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[TMP1]], i64 2, <2 x i1> , i32 2) -; CHECK-NEXT: [[TMP78:%.*]] = zext <2 x i8> [[TMP73]] to <2 x i32> -; CHECK-NEXT: [[TMP85:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX5]], i64 2, <2 x i1> , i32 2) -; CHECK-NEXT: [[TMP76:%.*]] = zext <2 x i8> [[TMP85]] to <2 x i32> -; CHECK-NEXT: [[TMP87:%.*]] = sub <2 x i32> [[TMP78]], [[TMP76]] +; CHECK-NEXT: [[TMP74:%.*]] = zext <2 x i8> [[TMP73]] to <2 x i32> +; CHECK-NEXT: [[TMP75:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX5]], i64 2, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP76:%.*]] = zext <2 x i8> [[TMP75]] to <2 x i32> +; CHECK-NEXT: [[TMP87:%.*]] = sub <2 x i32> [[TMP74]], [[TMP76]] ; CHECK-NEXT: [[TMP88:%.*]] = shl <2 x i32> [[TMP87]], -; CHECK-NEXT: [[TMP89:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX22]], i64 2, <2 x i1> , i32 2) -; CHECK-NEXT: [[TMP80:%.*]] = zext <2 x i8> [[TMP89]] to <2 x i32> +; CHECK-NEXT: [[TMP85:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX22]], i64 2, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP80:%.*]] = zext <2 x i8> [[TMP85]] to <2 x i32> ; CHECK-NEXT: [[TMP81:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX25]], i64 2, <2 x i1> , i32 2) ; CHECK-NEXT: [[TMP82:%.*]] = zext <2 x i8> [[TMP81]] to <2 x i32> ; CHECK-NEXT: [[TMP83:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX27]], i64 2, <2 x i1> , i32 2) @@ -158,40 +158,40 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP95:%.*]] = sub <2 x i32> [[TMP82]], [[TMP84]] ; CHECK-NEXT: [[TMP96:%.*]] = shl <2 x i32> [[TMP95]], ; CHECK-NEXT: [[TMP97:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV33]], i32 1 -; CHECK-NEXT: [[TMP90:%.*]] = sub <2 x i32> [[TMP97]], [[TMP80]] -; CHECK-NEXT: [[TMP105:%.*]] = add <2 x i32> [[TMP96]], [[TMP90]] +; CHECK-NEXT: [[TMP89:%.*]] = sub <2 x i32> [[TMP97]], [[TMP80]] +; CHECK-NEXT: [[TMP105:%.*]] = add <2 x i32> [[TMP96]], [[TMP89]] ; CHECK-NEXT: [[TMP86:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV1]], i32 0 -; CHECK-NEXT: [[TMP98:%.*]] = sub <2 x i32> [[TMP86]], [[TMP77]] -; CHECK-NEXT: [[TMP92:%.*]] = add <2 x i32> [[TMP88]], [[TMP98]] +; CHECK-NEXT: [[TMP100:%.*]] = sub <2 x i32> [[TMP86]], [[TMP72]] +; CHECK-NEXT: [[TMP92:%.*]] = add <2 x i32> [[TMP88]], [[TMP100]] ; CHECK-NEXT: [[TMP93:%.*]] = shufflevector <2 x i32> [[TMP105]], <2 x i32> [[TMP92]], <2 x i32> -; CHECK-NEXT: [[TMP106:%.*]] = add <2 x i32> [[TMP105]], [[TMP92]] -; CHECK-NEXT: [[TMP91:%.*]] = sub <2 x i32> [[TMP92]], [[TMP105]] -; CHECK-NEXT: [[TMP238:%.*]] = extractelement <2 x i32> [[TMP106]], i32 0 -; CHECK-NEXT: [[TMP108:%.*]] = extractelement <2 x i32> [[TMP106]], i32 1 -; CHECK-NEXT: [[ADD48:%.*]] = add i32 [[TMP108]], [[TMP238]] -; CHECK-NEXT: [[SUB51:%.*]] = sub i32 [[TMP238]], [[TMP108]] +; CHECK-NEXT: [[TMP91:%.*]] = add <2 x i32> [[TMP105]], [[TMP92]] +; CHECK-NEXT: [[TMP101:%.*]] = sub <2 x i32> [[TMP92]], [[TMP105]] ; CHECK-NEXT: [[TMP94:%.*]] = extractelement <2 x i32> [[TMP91]], i32 0 ; CHECK-NEXT: [[SUB47:%.*]] = extractelement <2 x i32> [[TMP91]], i32 1 -; CHECK-NEXT: [[ADD55:%.*]] = add i32 [[SUB47]], [[TMP94]] -; CHECK-NEXT: [[SUB59:%.*]] = sub i32 [[TMP94]], [[SUB47]] -; CHECK-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[TMP108]], 15 +; CHECK-NEXT: [[ADD78:%.*]] = add i32 [[SUB47]], [[TMP94]] +; CHECK-NEXT: [[SUB51:%.*]] = sub i32 [[TMP94]], [[SUB47]] +; CHECK-NEXT: [[TMP98:%.*]] = extractelement <2 x i32> [[TMP101]], i32 0 +; CHECK-NEXT: [[TMP99:%.*]] = extractelement <2 x i32> [[TMP101]], i32 1 +; CHECK-NEXT: [[ADD55:%.*]] = add i32 [[TMP99]], [[TMP98]] +; CHECK-NEXT: [[SUB59:%.*]] = sub i32 [[TMP98]], [[TMP99]] +; CHECK-NEXT: [[SHR_I59:%.*]] = lshr i32 [[SUB47]], 15 +; CHECK-NEXT: [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537 +; CHECK-NEXT: [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535 +; CHECK-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[TMP99]], 15 ; CHECK-NEXT: [[AND_I60_1:%.*]] = and i32 [[SHR_I59_1]], 65537 ; CHECK-NEXT: [[MUL_I61_1:%.*]] = mul i32 [[AND_I60_1]], 65535 -; CHECK-NEXT: [[SHR_I59_4:%.*]] = lshr i32 [[SUB47]], 15 -; CHECK-NEXT: [[AND_I60_4:%.*]] = and i32 [[SHR_I59_4]], 65537 -; CHECK-NEXT: [[MUL_I61_4:%.*]] = mul i32 [[AND_I60_4]], 65535 ; CHECK-NEXT: [[TMP104:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 ; CHECK-NEXT: [[TMP110:%.*]] = zext <2 x i8> [[TMP104]] to <2 x i32> -; CHECK-NEXT: [[TMP109:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ADD_PTR644]], i64 2, <2 x i1> , i32 2) -; CHECK-NEXT: [[TMP103:%.*]] = zext <2 x i8> [[TMP109]] to <2 x i32> -; CHECK-NEXT: [[TMP116:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX3_1]], i64 2, <2 x i1> , i32 2) -; CHECK-NEXT: [[TMP118:%.*]] = zext <2 x i8> [[TMP116]] to <2 x i32> -; CHECK-NEXT: [[TMP128:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX5_1]], i64 2, <2 x i1> , i32 2) -; CHECK-NEXT: [[TMP155:%.*]] = zext <2 x i8> [[TMP128]] to <2 x i32> -; CHECK-NEXT: [[TMP124:%.*]] = sub <2 x i32> [[TMP118]], [[TMP155]] +; CHECK-NEXT: [[TMP108:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ADD_PTR644]], i64 2, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP103:%.*]] = zext <2 x i8> [[TMP108]] to <2 x i32> +; CHECK-NEXT: [[TMP109:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX3_1]], i64 2, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP116:%.*]] = zext <2 x i8> [[TMP109]] to <2 x i32> +; CHECK-NEXT: [[TMP106:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX5_1]], i64 2, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP118:%.*]] = zext <2 x i8> [[TMP106]] to <2 x i32> +; CHECK-NEXT: [[TMP124:%.*]] = sub <2 x i32> [[TMP116]], [[TMP118]] ; CHECK-NEXT: [[TMP125:%.*]] = shl <2 x i32> [[TMP124]], -; CHECK-NEXT: [[TMP156:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX22_1]], i64 2, <2 x i1> , i32 2) -; CHECK-NEXT: [[TMP111:%.*]] = zext <2 x i8> [[TMP156]] to <2 x i32> +; CHECK-NEXT: [[TMP121:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX22_1]], i64 2, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP111:%.*]] = zext <2 x i8> [[TMP121]] to <2 x i32> ; CHECK-NEXT: [[TMP112:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX25_1]], i64 2, <2 x i1> , i32 2) ; CHECK-NEXT: [[TMP113:%.*]] = zext <2 x i8> [[TMP112]] to <2 x i32> ; CHECK-NEXT: [[TMP114:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX27_1]], i64 2, <2 x i1> , i32 2) @@ -205,35 +205,35 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP122:%.*]] = sub <2 x i32> [[TMP117]], [[TMP103]] ; CHECK-NEXT: [[TMP123:%.*]] = add <2 x i32> [[TMP125]], [[TMP122]] ; CHECK-NEXT: [[TMP143:%.*]] = add <2 x i32> [[TMP120]], [[TMP123]] -; CHECK-NEXT: [[TMP121:%.*]] = sub <2 x i32> [[TMP123]], [[TMP120]] +; CHECK-NEXT: [[TMP156:%.*]] = sub <2 x i32> [[TMP123]], [[TMP120]] ; CHECK-NEXT: [[TMP145:%.*]] = extractelement <2 x i32> [[TMP143]], i32 0 ; CHECK-NEXT: [[TMP146:%.*]] = extractelement <2 x i32> [[TMP143]], i32 1 -; CHECK-NEXT: [[ADD48_1:%.*]] = add i32 [[TMP146]], [[TMP145]] +; CHECK-NEXT: [[ADD94:%.*]] = add i32 [[TMP146]], [[TMP145]] ; CHECK-NEXT: [[SUB51_1:%.*]] = sub i32 [[TMP145]], [[TMP146]] -; CHECK-NEXT: [[TMP126:%.*]] = extractelement <2 x i32> [[TMP121]], i32 0 -; CHECK-NEXT: [[TMP127:%.*]] = extractelement <2 x i32> [[TMP121]], i32 1 -; CHECK-NEXT: [[ADD55_1:%.*]] = add i32 [[TMP127]], [[TMP126]] -; CHECK-NEXT: [[SUB59_1:%.*]] = sub i32 [[TMP126]], [[TMP127]] +; CHECK-NEXT: [[TMP180:%.*]] = extractelement <2 x i32> [[TMP156]], i32 0 +; CHECK-NEXT: [[TMP142:%.*]] = extractelement <2 x i32> [[TMP156]], i32 1 +; CHECK-NEXT: [[ADD55_1:%.*]] = add i32 [[TMP142]], [[TMP180]] +; CHECK-NEXT: [[SUB59_1:%.*]] = sub i32 [[TMP180]], [[TMP142]] ; CHECK-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[TMP146]], 15 ; CHECK-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 ; CHECK-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 ; CHECK-NEXT: [[TMP147:%.*]] = lshr <2 x i32> [[TMP110]], ; CHECK-NEXT: [[TMP148:%.*]] = and <2 x i32> [[TMP147]], ; CHECK-NEXT: [[TMP149:%.*]] = mul <2 x i32> [[TMP148]], -; CHECK-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_1]], [[ADD48]] -; CHECK-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_1]] -; CHECK-NEXT: [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]] +; CHECK-NEXT: [[ADD79:%.*]] = add i32 [[ADD94]], [[ADD78]] ; CHECK-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD94]] -; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB102]], [[SUB86]] -; CHECK-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB102]] +; CHECK-NEXT: [[ADD103:%.*]] = add i32 [[ADD95]], [[ADD79]] +; CHECK-NEXT: [[SUB105:%.*]] = sub i32 [[ADD79]], [[ADD95]] +; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB102]], [[SUB104]] +; CHECK-NEXT: [[SUB106:%.*]] = sub i32 [[SUB104]], [[SUB102]] ; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I51_3]], [[ADD103]] -; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP63]] +; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP77]] ; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I_1]], [[ADD105]] -; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[CONV]] -; CHECK-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56_1]], [[SUB104]] +; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[SUB47_2]] +; CHECK-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56_1]], [[SUB105]] ; CHECK-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP146]] -; CHECK-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61_1]], [[SUB106]] -; CHECK-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP108]] +; CHECK-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]] +; CHECK-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[SUB47]] ; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]] ; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]] ; CHECK-NEXT: [[ADD113:%.*]] = add i32 [[ADD112]], [[XOR_I63]] @@ -241,9 +241,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[SUB86_1:%.*]] = sub i32 [[ADD55]], [[ADD55_1]] ; CHECK-NEXT: [[ADD105_1:%.*]] = add i32 [[SUB102_1]], [[SUB86_1]] ; CHECK-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]] -; CHECK-NEXT: [[ADD_I52_1:%.*]] = add i32 [[MUL_I51_5]], [[ADD105_1]] +; CHECK-NEXT: [[ADD_I52_1:%.*]] = add i32 [[MUL_I51_1]], [[ADD105_1]] ; CHECK-NEXT: [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[TMP107]] -; CHECK-NEXT: [[TMP129:%.*]] = shufflevector <2 x i32> [[TMP17]], <2 x i32> [[TMP121]], <2 x i32> +; CHECK-NEXT: [[TMP129:%.*]] = shufflevector <2 x i32> [[TMP17]], <2 x i32> [[TMP156]], <2 x i32> ; CHECK-NEXT: [[TMP130:%.*]] = lshr <2 x i32> [[TMP129]], ; CHECK-NEXT: [[TMP131:%.*]] = and <2 x i32> [[TMP130]], ; CHECK-NEXT: [[TMP132:%.*]] = mul <2 x i32> [[TMP131]], @@ -256,13 +256,13 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP139:%.*]] = shufflevector <2 x i32> [[TMP153]], <2 x i32> [[TMP138]], <2 x i32> ; CHECK-NEXT: [[TMP140:%.*]] = add <2 x i32> [[TMP132]], [[TMP139]] ; CHECK-NEXT: [[TMP141:%.*]] = xor <2 x i32> [[TMP140]], [[TMP129]] -; CHECK-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_4]], [[SUB106_1]] -; CHECK-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[SUB47]] +; CHECK-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]] +; CHECK-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP99]] ; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD113]] -; CHECK-NEXT: [[TMP142:%.*]] = extractelement <2 x i32> [[TMP141]], i32 0 -; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[TMP142]] -; CHECK-NEXT: [[TMP154:%.*]] = extractelement <2 x i32> [[TMP141]], i32 1 -; CHECK-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[TMP154]] +; CHECK-NEXT: [[TMP154:%.*]] = extractelement <2 x i32> [[TMP141]], i32 0 +; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[TMP154]] +; CHECK-NEXT: [[TMP155:%.*]] = extractelement <2 x i32> [[TMP141]], i32 1 +; CHECK-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[TMP155]] ; CHECK-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[XOR_I63_1]] ; CHECK-NEXT: [[ADD94_2:%.*]] = add i32 [[SUB51_1]], [[SUB51]] ; CHECK-NEXT: [[SUB86_2:%.*]] = sub i32 [[SUB51]], [[SUB51_1]] @@ -270,25 +270,25 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP245:%.*]] = shufflevector <2 x i32> [[TMP244]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP197:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_4]], i32 0 ; CHECK-NEXT: [[TMP198:%.*]] = shufflevector <2 x i32> [[TMP197]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP216:%.*]] = add <2 x i32> [[TMP245]], [[TMP198]] -; CHECK-NEXT: [[TMP210:%.*]] = sub <2 x i32> [[TMP245]], [[TMP198]] -; CHECK-NEXT: [[TMP221:%.*]] = shufflevector <2 x i32> [[TMP216]], <2 x i32> [[TMP210]], <2 x i32> +; CHECK-NEXT: [[TMP207:%.*]] = add <2 x i32> [[TMP245]], [[TMP198]] +; CHECK-NEXT: [[TMP208:%.*]] = sub <2 x i32> [[TMP245]], [[TMP198]] +; CHECK-NEXT: [[TMP209:%.*]] = shufflevector <2 x i32> [[TMP207]], <2 x i32> [[TMP208]], <2 x i32> ; CHECK-NEXT: [[ADD105_2:%.*]] = add i32 [[SUB102_2]], [[SUB86_2]] ; CHECK-NEXT: [[SUB106_2:%.*]] = sub i32 [[SUB86_2]], [[SUB102_2]] -; CHECK-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_4]], [[ADD105_2]] +; CHECK-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_6]], [[ADD105_2]] ; CHECK-NEXT: [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]] -; CHECK-NEXT: [[TMP134:%.*]] = add <2 x i32> [[TMP149]], [[TMP221]] +; CHECK-NEXT: [[TMP134:%.*]] = add <2 x i32> [[TMP149]], [[TMP209]] ; CHECK-NEXT: [[TMP213:%.*]] = xor <2 x i32> [[TMP134]], [[TMP110]] -; CHECK-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[TMP238]], 15 +; CHECK-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[TMP94]], 15 ; CHECK-NEXT: [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537 ; CHECK-NEXT: [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535 ; CHECK-NEXT: [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]] -; CHECK-NEXT: [[XOR_I63_2:%.*]] = xor i32 [[ADD_I62_2]], [[TMP238]] +; CHECK-NEXT: [[XOR_I63_2:%.*]] = xor i32 [[ADD_I62_2]], [[TMP94]] ; CHECK-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_1]] -; CHECK-NEXT: [[TMP237:%.*]] = extractelement <2 x i32> [[TMP213]], i32 0 -; CHECK-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP237]] -; CHECK-NEXT: [[TMP218:%.*]] = extractelement <2 x i32> [[TMP213]], i32 1 -; CHECK-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[TMP218]] +; CHECK-NEXT: [[TMP157:%.*]] = extractelement <2 x i32> [[TMP213]], i32 0 +; CHECK-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP157]] +; CHECK-NEXT: [[TMP158:%.*]] = extractelement <2 x i32> [[TMP213]], i32 1 +; CHECK-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[TMP158]] ; CHECK-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I63_2]] ; CHECK-NEXT: [[ADD94_3:%.*]] = add i32 [[SUB59_1]], [[SUB59]] ; CHECK-NEXT: [[SUB86_3:%.*]] = sub i32 [[SUB59]], [[SUB59_1]] @@ -301,7 +301,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP220:%.*]] = shufflevector <2 x i32> [[TMP261]], <2 x i32> [[TMP262]], <2 x i32> ; CHECK-NEXT: [[ADD105_3:%.*]] = add i32 [[SUB102_3]], [[SUB86_3]] ; CHECK-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]] -; CHECK-NEXT: [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_6]], [[ADD105_3]] +; CHECK-NEXT: [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_4]], [[ADD105_3]] ; CHECK-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV1]] ; CHECK-NEXT: [[TMP230:%.*]] = lshr <2 x i32> [[TMP102]], ; CHECK-NEXT: [[TMP231:%.*]] = and <2 x i32> [[TMP230]], diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll index faffe16f8e9cd9..6dd9242989b627 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll @@ -793,6 +793,25 @@ entry: ret double %add } +define float @reduce_fadd_after_fmul_of_buildvec(float %a, float %b, float %c) { +; CHECK-LABEL: @reduce_fadd_after_fmul_of_buildvec( +; CHECK-NEXT: [[MUL_0:%.*]] = fmul fast float [[A:%.*]], 1.000000e+01 +; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast float [[B:%.*]], 1.000000e+01 +; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[C:%.*]], 1.000000e+01 +; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast float [[MUL_0]], [[MUL_1]] +; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]] +; CHECK-NEXT: ret float [[ADD_1]] +; + %mul.0 = fmul fast float %a, 10.0 + %mul.1 = fmul fast float %b, 10.0 + %mul.2 = fmul fast float %c, 10.0 + + %add.0 = fadd fast float %mul.0, %mul.1 + %add.1 = fadd fast float %add.0, %mul.2 + ret float %add.1 +} + + declare float @llvm.fmuladd.f32(float, float, float) declare double @llvm.fmuladd.f64(double, double, double) diff --git a/llvm/tools/llvm-dis/llvm-dis.cpp b/llvm/tools/llvm-dis/llvm-dis.cpp index d28af85bc739eb..a3a62f042ddbd1 100644 --- a/llvm/tools/llvm-dis/llvm-dis.cpp +++ b/llvm/tools/llvm-dis/llvm-dis.cpp @@ -11,7 +11,23 @@ // llvm-dis [options] x.bc - Read LLVM bitcode from the x.bc file, write asm // to the x.ll file. // Options: -// --help - Output information about command line switches +// +// Color Options: +// --color - Use colors in output (default=autodetect) +// +// Disassembler Options: +// -f - Enable binary output on terminals +// --materialize-metadata - Load module without materializing metadata, +// then materialize only the metadata +// -o - Override output filename +// --show-annotations - Add informational comments to the .ll file +// +// Generic Options: +// --help - Display available options +// (--help-hidden for more) +// --help-list - Display list of available options +// (--help-list-hidden for more) +// --version - Display the version of this program // //===----------------------------------------------------------------------===// diff --git a/llvm/tools/opt/CMakeLists.txt b/llvm/tools/opt/CMakeLists.txt index 8d5c9fb62e5bec..6dd74ae1b7f8fe 100644 --- a/llvm/tools/opt/CMakeLists.txt +++ b/llvm/tools/opt/CMakeLists.txt @@ -46,9 +46,8 @@ add_llvm_tool(opt intrinsics_gen SUPPORT_PLUGINS + EXPORT_SYMBOLS ) target_link_libraries(opt PRIVATE LLVMOptDriver) setup_host_tool(opt OPT opt_exe opt_target) - -export_executable_symbols_for_plugins(opt) diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index b76d24dc297b96..6280963d588fac 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -777,6 +777,30 @@ define void @foo(ptr %ptr) { EXPECT_EQ(LookupBB2Addr, nullptr); } +TEST_F(SandboxIRTest, ConstantTokenNone) { + parseIR(C, R"IR( +define void @foo(ptr %ptr) { + bb0: + %cs = catchswitch within none [label %handler] unwind to caller + handler: + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + + [[maybe_unused]] auto &F = *Ctx.createFunction(&LLVMF); + auto *BB0 = cast( + Ctx.getValue(getBasicBlockByName(LLVMF, "bb0"))); + auto *CS = cast(&*BB0->begin()); + + // Check classof(), creation, getFunction(), getBasicBlock(). + auto *CTN = cast(CS->getParentPad()); + // Check get(). + auto *NewCTN = sandboxir::ConstantTokenNone::get(Ctx); + EXPECT_EQ(NewCTN, CTN); +} + TEST_F(SandboxIRTest, Use) { parseIR(C, R"IR( define i32 @foo(i32 %v0, i32 %v1) { @@ -1354,14 +1378,18 @@ define void @foo(i1 %c0, i8 %v0, i8 %v1, i1 %c1) { auto *BB = &*F->begin(); auto It = BB->begin(); auto *Select = cast(&*It++); + const auto *ConstSelect = Select; // To test the const getters. auto *Ret = &*It++; // Check getCondition(). EXPECT_EQ(Select->getCondition(), Cond0); + EXPECT_EQ(ConstSelect->getCondition(), Cond0); // Check getTrueValue(). EXPECT_EQ(Select->getTrueValue(), V0); + EXPECT_EQ(ConstSelect->getTrueValue(), V0); // Check getFalseValue(). EXPECT_EQ(Select->getFalseValue(), V1); + EXPECT_EQ(ConstSelect->getFalseValue(), V1); // Check setCondition(). Select->setCondition(Cond1); EXPECT_EQ(Select->getCondition(), Cond1); @@ -1371,6 +1399,13 @@ define void @foo(i1 %c0, i8 %v0, i8 %v1, i1 %c1) { // Check setFalseValue(). Select->setFalseValue(V0); EXPECT_EQ(Select->getFalseValue(), V0); + // Check swapValues(). + Select->swapValues(); + EXPECT_EQ(Select->getTrueValue(), V0); + EXPECT_EQ(Select->getFalseValue(), V1); + // Check areInvalidOperands. + EXPECT_EQ(sandboxir::SelectInst::areInvalidOperands(Cond0, V0, V1), nullptr); + EXPECT_NE(sandboxir::SelectInst::areInvalidOperands(V0, V1, Cond0), nullptr); { // Check SelectInst::create() InsertBefore. diff --git a/llvm/unittests/SandboxIR/TrackerTest.cpp b/llvm/unittests/SandboxIR/TrackerTest.cpp index a1f39fe958e351..a1a4117b5e97b1 100644 --- a/llvm/unittests/SandboxIR/TrackerTest.cpp +++ b/llvm/unittests/SandboxIR/TrackerTest.cpp @@ -964,6 +964,32 @@ define void @foo(i32 %cond0, i32 %cond1) { EXPECT_EQ(Switch->findCaseDest(BB1), One); } +TEST_F(TrackerTest, SelectInst) { + parseIR(C, R"IR( +define void @foo(i1 %c0, i8 %v0, i8 %v1) { + %sel = select i1 %c0, i8 %v0, i8 %v1 + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(LLVMF); + auto *V0 = F->getArg(1); + auto *V1 = F->getArg(2); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *Select = cast(&*It++); + + // Check tracking for swapValues. + Ctx.save(); + Select->swapValues(); + EXPECT_EQ(Select->getTrueValue(), V1); + EXPECT_EQ(Select->getFalseValue(), V0); + Ctx.revert(); + EXPECT_EQ(Select->getTrueValue(), V0); + EXPECT_EQ(Select->getFalseValue(), V1); +} + TEST_F(TrackerTest, ShuffleVectorInst) { parseIR(C, R"IR( define void @foo(<2 x i8> %v1, <2 x i8> %v2) { diff --git a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn index fe8a3f590dd3c2..acf9e7aa701e7f 100644 --- a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn @@ -35,6 +35,7 @@ static_library("Target") { sources = [ "ABI.cpp", "AssertFrameRecognizer.cpp", + "CoreFileMemoryRanges.cpp", "DynamicRegisterInfo.cpp", "ExecutionContext.cpp", "InstrumentationRuntime.cpp", diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index 19f35fc7e212f3..a1785073547ad0 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -1017,6 +1017,20 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): return exitCode +def findColor(line, curr_color): + start = line.rfind("\33[") + if start == -1: + return curr_color + end = line.find("m", start + 2) + if end == -1: + return curr_color + match = line[start : end + 1] + # "\33[0m" means "reset all formatting". Sometimes the 0 is skipped. + if match == "\33[m" or match == "\33[0m": + return None + return match + + def formatOutput(title, data, limit=None): if not data.strip(): return "" @@ -1027,8 +1041,18 @@ def formatOutput(title, data, limit=None): msg = "" ndashes = 30 # fmt: off - out = f"# .---{title}{'-' * (ndashes - 4 - len(title))}\n" - out += f"# | " + "\n# | ".join(data.splitlines()) + "\n" + out = f"# .---{title}{'-' * (ndashes - 4 - len(title))}\n" + curr_color = None + for line in data.splitlines(): + if curr_color: + out += "\33[0m" + out += "# | " + if curr_color: + out += curr_color + out += line + "\n" + curr_color = findColor(line, curr_color) + if curr_color: + out += "\33[0m" # prevent unterminated formatting from leaking out += f"# `---{msg}{'-' * (ndashes - 4 - len(msg))}\n" # fmt: on return out diff --git a/llvm/utils/lit/tests/Inputs/escape-color/color-escaped.txt b/llvm/utils/lit/tests/Inputs/escape-color/color-escaped.txt new file mode 100644 index 00000000000000..e7a33e380b351c --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/escape-color/color-escaped.txt @@ -0,0 +1,10 @@ +# .---command stdout------------ +# | # RUN: cat %s +# | red +# | still red(B +# | plain +# | green +# | still green (never terminated) +# `----------------------------- + +-- diff --git a/llvm/utils/lit/tests/Inputs/escape-color/color.txt b/llvm/utils/lit/tests/Inputs/escape-color/color.txt new file mode 100644 index 00000000000000..15ffc22d134f0f --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/escape-color/color.txt @@ -0,0 +1,6 @@ +# RUN: cat %s +red +still red(B +plain +green +still green (never terminated) diff --git a/llvm/utils/lit/tests/Inputs/escape-color/lit.cfg b/llvm/utils/lit/tests/Inputs/escape-color/lit.cfg new file mode 100644 index 00000000000000..36f4eb69d4858e --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/escape-color/lit.cfg @@ -0,0 +1,8 @@ +import lit.formats + +config.name = "escape-color" +config.suffixes = [".txt"] +config.test_format = lit.formats.ShTest() +config.test_source_root = None +config.test_exec_root = None + diff --git a/llvm/utils/lit/tests/escape-color.py b/llvm/utils/lit/tests/escape-color.py new file mode 100644 index 00000000000000..8fdda3553da399 --- /dev/null +++ b/llvm/utils/lit/tests/escape-color.py @@ -0,0 +1,4 @@ +# cut off the first 9 lines to avoid absolute file paths in the output +# then keep only the next 10 lines to avoid test timing in the output +# RUN: %{lit} %{inputs}/escape-color/color.txt -a | tail -n +10 | head -n 10 > %t +# RUN: diff --strip-trailing-cr %{inputs}/escape-color/color-escaped.txt %t diff --git a/mlir/include/mlir/Analysis/Presburger/Utils.h b/mlir/include/mlir/Analysis/Presburger/Utils.h index d3c0802c240bc1..0e6d18279d67ed 100644 --- a/mlir/include/mlir/Analysis/Presburger/Utils.h +++ b/mlir/include/mlir/Analysis/Presburger/Utils.h @@ -17,7 +17,9 @@ #include "llvm/ADT/DynamicAPInt.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/Support/raw_ostream.h" #include +#include namespace mlir { namespace presburger { @@ -292,6 +294,54 @@ std::vector multiplyPolynomials(ArrayRef a, bool isRangeZero(ArrayRef arr); +/// Example usage: +/// Print .12, 3.4, 56.7 +/// preAlign = ".", minSpacing = 1, +/// .12 .12 +/// 3.4 3.4 +/// 56.7 56.7 +struct PrintTableMetrics { + // If unknown, set to 0 and pass the struct into updatePrintMetrics. + unsigned maxPreIndent; + unsigned maxPostIndent; + std::string preAlign; +}; + +/// Iterate over each val in the table and update 'm' where +/// .maxPreIndent and .maxPostIndent are initialized to 0. +/// class T is any type that can be handled by llvm::raw_string_ostream. +template +void updatePrintMetrics(T val, PrintTableMetrics &m) { + std::string str; + llvm::raw_string_ostream(str) << val; + if (str.empty()) + return; + unsigned preIndent = str.find(m.preAlign); + preIndent = (preIndent != (unsigned)std::string::npos) ? preIndent + 1 : 0; + m.maxPreIndent = std::max(m.maxPreIndent, preIndent); + m.maxPostIndent = + std::max(m.maxPostIndent, (unsigned int)(str.length() - preIndent)); +} + +/// Print val in the table with metrics specified in 'm'. +template +void printWithPrintMetrics(raw_ostream &os, T val, unsigned minSpacing, + const PrintTableMetrics &m) { + std::string str; + llvm::raw_string_ostream(str) << val; + unsigned preIndent; + if (!str.empty()) { + preIndent = str.find(m.preAlign); + preIndent = (preIndent != (unsigned)std::string::npos) ? preIndent + 1 : 0; + } else { + preIndent = 0; + } + for (unsigned i = 0; i < (minSpacing + m.maxPreIndent - preIndent); ++i) + os << " "; + os << str; + for (unsigned i = 0; i < m.maxPostIndent - (str.length() - preIndent); ++i) + os << " "; +} } // namespace presburger } // namespace mlir diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td index 8a1ef94c853a58..1ec8227e232637 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td @@ -254,7 +254,7 @@ def AMDGPU_RawBufferAtomicCmpswapOp : def AMDGPU_RawBufferAtomicFaddOp : AMDGPU_Op<"raw_buffer_atomic_fadd", [AllElementTypesMatch<["value", "memref"]>, AttrSizedOperandSegments]>, - Arguments<(ins F32:$value, + Arguments<(ins AnyTypeOf<[F32, VectorOfLengthAndType<[2], [F16]>]>:$value, Arg:$memref, Variadic:$indices, DefaultValuedAttr:$boundsCheck, @@ -405,7 +405,7 @@ def AMDGPU_RawBufferAtomicUminOp : def AMDGPU_DPPPerm : I32EnumAttr<"DPPPerm", "The possible permutations for a DPP operation", - [ + [ I32EnumAttrCase<"quad_perm", 0>, I32EnumAttrCase<"row_shl", 1>, I32EnumAttrCase<"row_shr", 2>, @@ -419,7 +419,7 @@ def AMDGPU_DPPPerm : I32EnumAttr<"DPPPerm", I32EnumAttrCase<"row_bcast_15", 10>, I32EnumAttrCase<"row_bcast_31", 11> ]> { - let genSpecializedAttr = 0; + let genSpecializedAttr = 0; let cppNamespace = "::mlir::amdgpu"; } diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index 0f9f0581e5bdd1..4ccd2c8874ee16 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -152,7 +152,7 @@ class OpFilter { /// This function adds a DENY entry. void denyDialect(StringRef dialectNamespace) { Entry::FilterFn filterFn = [=](Operation *op) { - return op->getDialect()->getNamespace() == dialectNamespace; + return op->getName().getDialectNamespace() == dialectNamespace; }; entries.push_back(Entry{filterFn, Entry::FilterType::DENY}); } diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index e1c0d7771233f2..8f6c76a3479f23 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -347,7 +347,7 @@ def Tosa_TransposeConv2DOp : Tosa_InferShapedTypeOp<"transpose_conv2d"> { Tosa_Tensor1D:$bias, Tosa_IntArrayAttr4:$out_pad, Tosa_IntArrayAttr2:$stride, - Tosa_IntArrayAttrUpto4:$out_shape, + Tosa_IntArrayAttr4:$out_shape, OptionalAttr:$quantization_info, DefaultValuedOptionalAttr:$local_bound ); diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td index 49fd88ec2a3a44..f3bd95aa7e3feb 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td @@ -179,9 +179,6 @@ def Tosa_Int8Like : Tosa_TypeLike<[Tosa_Int8], "signless-integer-8-bit-like">; //===----------------------------------------------------------------------===// // Attribute predicates and classes. //===----------------------------------------------------------------------===// -class DenseArrayMaxCt : AttrConstraint< - CPred<"::llvm::cast<::mlir::DenseArrayAttr>($_self).size() <= " # n>, - "with at least " # n # " elements">; def Tosa_Fp32ArrayAttr2 : ConfinedAttr]>; def Tosa_Fp32ArrayAttr3 : ConfinedAttr]>; diff --git a/mlir/include/mlir/IR/CommonAttrConstraints.td b/mlir/include/mlir/IR/CommonAttrConstraints.td index 6774a7c568315d..853fb318c76e71 100644 --- a/mlir/include/mlir/IR/CommonAttrConstraints.td +++ b/mlir/include/mlir/IR/CommonAttrConstraints.td @@ -789,6 +789,14 @@ class DenseArrayCount : AttrConstraint< CPred<"::llvm::cast<::mlir::DenseArrayAttr>($_self).size() == " #n>, "with exactly " # n # " elements">; +class DenseArrayMaxCt : AttrConstraint< + CPred<"::llvm::cast<::mlir::DenseArrayAttr>($_self).size() <= " # n>, + "with at most " # n # " elements">; + +class DenseArrayMinCt : AttrConstraint< + CPred<"::llvm::cast<::mlir::DenseArrayAttr>($_self).size() >= " # n>, + "with at least " # n # " elements">; + class DenseArrayStrictlyPositive : AttrConstraint< CPred<"::llvm::all_of(::llvm::cast<" # arrayType #">($_self).asArrayRef(), " "[&](auto v) { return v > 0; })">, diff --git a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp index 94af81f955e5a5..74cdf567c0e569 100644 --- a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp @@ -32,7 +32,10 @@ #include #include #include +#include #include +#include +#include #include #include @@ -2589,19 +2592,26 @@ void IntegerRelation::mergeAndCompose(const IntegerRelation &other) { void IntegerRelation::print(raw_ostream &os) const { assert(hasConsistentState()); printSpace(os); + PrintTableMetrics ptm = {0, 0, "-"}; + for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) + for (unsigned j = 0, f = getNumCols(); j < f; ++j) + updatePrintMetrics(atEq(i, j), ptm); + for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) + for (unsigned j = 0, f = getNumCols(); j < f; ++j) + updatePrintMetrics(atIneq(i, j), ptm); + // Print using PrintMetrics. + unsigned MIN_SPACING = 1; for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) { - os << " "; for (unsigned j = 0, f = getNumCols(); j < f; ++j) { - os << atEq(i, j) << "\t"; + printWithPrintMetrics(os, atEq(i, j), MIN_SPACING, ptm); } - os << "= 0\n"; + os << " = 0\n"; } for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) { - os << " "; for (unsigned j = 0, f = getNumCols(); j < f; ++j) { - os << atIneq(i, j) << "\t"; + printWithPrintMetrics(os, atIneq(i, j), MIN_SPACING, ptm); } - os << ">= 0\n"; + os << " >= 0\n"; } os << '\n'; } diff --git a/mlir/lib/Analysis/Presburger/Matrix.cpp b/mlir/lib/Analysis/Presburger/Matrix.cpp index 110c5df1af37c0..9fc6205eb5ed52 100644 --- a/mlir/lib/Analysis/Presburger/Matrix.cpp +++ b/mlir/lib/Analysis/Presburger/Matrix.cpp @@ -398,10 +398,16 @@ Matrix Matrix::getSubMatrix(unsigned fromRow, unsigned toRow, template void Matrix::print(raw_ostream &os) const { - for (unsigned row = 0; row < nRows; ++row) { + PrintTableMetrics ptm = {0, 0, "-"}; + for (unsigned row = 0; row < nRows; ++row) for (unsigned column = 0; column < nColumns; ++column) - os << at(row, column) << ' '; - os << '\n'; + updatePrintMetrics(at(row, column), ptm); + unsigned MIN_SPACING = 1; + for (unsigned row = 0; row < nRows; ++row) { + for (unsigned column = 0; column < nColumns; ++column) { + printWithPrintMetrics(os, at(row, column), MIN_SPACING, ptm); + } + os << "\n"; } } diff --git a/mlir/lib/Analysis/Presburger/Simplex.cpp b/mlir/lib/Analysis/Presburger/Simplex.cpp index c78a0723a6c0fa..4ffa2d546af4dd 100644 --- a/mlir/lib/Analysis/Presburger/Simplex.cpp +++ b/mlir/lib/Analysis/Presburger/Simplex.cpp @@ -2153,9 +2153,16 @@ void SimplexBase::print(raw_ostream &os) const { for (unsigned col = 2, e = getNumColumns(); col < e; ++col) os << ", c" << col << ": " << colUnknown[col]; os << '\n'; - for (unsigned row = 0, numRows = getNumRows(); row < numRows; ++row) { + PrintTableMetrics ptm = {0, 0, "-"}; + for (unsigned row = 0, numRows = getNumRows(); row < numRows; ++row) for (unsigned col = 0, numCols = getNumColumns(); col < numCols; ++col) - os << tableau(row, col) << '\t'; + updatePrintMetrics(tableau(row, col), ptm); + unsigned MIN_SPACING = 1; + for (unsigned row = 0, numRows = getNumRows(); row < numRows; ++row) { + for (unsigned col = 0, numCols = getNumColumns(); col < numCols; ++col) { + printWithPrintMetrics(os, tableau(row, col), MIN_SPACING, + ptm); + } os << '\n'; } os << '\n'; diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index 96b433294d258a..9fb557bc8a65ed 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -115,15 +115,18 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern { rewriter.getIntegerType(floatType.getWidth())); } if (auto dataVector = dyn_cast(wantedDataType)) { + uint32_t vecLen = dataVector.getNumElements(); uint32_t elemBits = dataVector.getElementTypeBitWidth(); - uint32_t totalBits = elemBits * dataVector.getNumElements(); + uint32_t totalBits = elemBits * vecLen; + bool usePackedFp16 = + isa_and_present(*gpuOp) && vecLen == 2; if (totalBits > maxVectorOpWidth) return gpuOp.emitOpError( "Total width of loads or stores must be no more than " + Twine(maxVectorOpWidth) + " bits, but we call for " + Twine(totalBits) + " bits. This should've been caught in validation"); - if (elemBits < 32) { + if (!usePackedFp16 && elemBits < 32) { if (totalBits > 32) { if (totalBits % 32 != 0) return gpuOp.emitOpError("Load or store of more than 32-bits that " diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 93e8b080a4f672..29926719129dc5 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -329,10 +329,9 @@ void mlir::configureGpuToROCDLConversionLegality(ConversionTarget &target) { target.addLegalDialect<::mlir::LLVM::LLVMDialect>(); target.addLegalDialect(); target.addIllegalDialect(); - target.addIllegalOp(); + target.addIllegalOp(); // TODO: Remove once we support replacing non-root ops. target.addLegalOp(); diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp index bb0d90dbba4a01..926ebb1a2cea87 100644 --- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp @@ -1602,6 +1602,50 @@ static FailureOr getConsumerFromUses(Value val, return &operand; } +/// Find the perfectly nested loops outside of given loop(included) sorted from +/// outer to inner. +/// +/// E.g. +/// +/// ``` +/// %0 = scf.for() +/// %1 = scf.for() +/// %2 = scf.for() +/// %3 = ... +/// yield %3 +/// yield %2 +/// yield %1 +/// ``` +/// +/// This function will return three perfectly nested loops: %0 + %1 + %2, when +/// target inner loop is %2. +static SmallVector +getPerfectlyNestedLoopsOutsideOf(scf::ForOp loop) { + SmallVector nestLoops = {loop}; + auto outerLoop = dyn_cast(loop->getParentOp()); + + // Check if it is the ForOp that yield the result of inner loop. + auto isForOpYieldResultOfInnerLoop = + [](scf::ForOp outerLoop) -> LogicalResult { + Block *body = outerLoop.getBody(); + if (!llvm::hasSingleElement(body->without_terminator())) + return failure(); + auto yieldOp = cast(body->getTerminator()); + auto innerForOp = dyn_cast(body->front()); + if (!innerForOp) + return failure(); + // All of innerForOp results should be yielded. + return success(innerForOp->getNumResults() == yieldOp->getNumOperands()); + }; + + while (outerLoop && succeeded(isForOpYieldResultOfInnerLoop(outerLoop))) { + nestLoops.push_back(outerLoop); + outerLoop = dyn_cast(outerLoop->getParentOp()); + } + // sorted from outer to inner + return {nestLoops.rbegin(), nestLoops.rend()}; +} + /// Fetch the untiled consumer of a scf.for's result which is yielded by a /// tensor.insert_slice. This function makes the following assumptions : /// 1. tensor.insert_slice has scf.yield as its only user. @@ -1619,9 +1663,10 @@ getUntiledConsumerFromSlice(tensor::InsertSliceOp candidateSliceOp) { auto forOp = dyn_cast(containingOp); if (!forOp) return failure(); - Value resultingValue = forOp->getResult(resultNumber); + scf::ForOp topLevelForOp = getPerfectlyNestedLoopsOutsideOf(forOp).front(); + Value resultingValue = topLevelForOp->getResult(resultNumber); - return getConsumerFromUses(resultingValue, containingOp->getBlock()); + return getConsumerFromUses(resultingValue, topLevelForOp->getBlock()); } /// Fetch the first untiled consumer of a scf.forall's result which is yielded @@ -1684,59 +1729,6 @@ static FailureOr getUntiledConsumerFromSlice(Operation *sliceOp) { } } -/// After fusing consumer into scf.for we want to modify the scf.yield operation -/// to reflect the same by returning the values yielded by the tiled consumer. -static void -fixTerminatorSCFYield(RewriterBase &rewriter, scf::ForOp newForOp, - TilingResult &tilingResult, - ArrayRef> &resultOffsets, - ArrayRef> &resultSizes, - ArrayRef bbArgs) { - scf::YieldOp oldTerminatorOp = - cast(newForOp.getBody()->getTerminator()); - unsigned totalOldResults = oldTerminatorOp->getNumResults(); - unsigned totalTiledResults = tilingResult.tiledOps[0]->getNumResults(); - SmallVector newYieldOperands; - newYieldOperands.reserve(totalOldResults + totalTiledResults); - for (auto oldResult : oldTerminatorOp.getResults()) { - newYieldOperands.push_back(oldResult); - } - rewriter.setInsertionPointAfter(oldTerminatorOp); - Location loc = newForOp.getLoc(); - for (auto [tiledResult, bbArg, resultOffset, resultSize] : - llvm::zip_equal(tilingResult.tiledOps[0]->getResults(), bbArgs, - resultOffsets, resultSizes)) { - SmallVector strides(resultOffset.size(), - rewriter.getIndexAttr(1)); - Value newInsertSliceOp = rewriter.create( - loc, tiledResult, bbArg, resultOffset, resultSize, strides); - newYieldOperands.push_back(newInsertSliceOp); - } - rewriter.create(loc, newYieldOperands); - rewriter.eraseOp(oldTerminatorOp); -} - -/// After fusing consumer into scf.forall we want to yield each of the resulting -/// values by the tiled consumer within scf.forall.in_parallel region. -static void -fixTerminatorSCFInParallel(RewriterBase &rewriter, scf::ForallOp newForallOp, - SmallVector tiledResults, - ArrayRef> &resultOffsets, - ArrayRef> &resultSizes, - ArrayRef bbArgs) { - scf::InParallelOp newTerminatorOp = newForallOp.getTerminator(); - rewriter.setInsertionPointToStart(newTerminatorOp.getBody()); - Location firstYieldOpLoc = - (*(newTerminatorOp.getYieldingOps().begin())).getLoc(); - for (auto [tiledResult, bbArg, resultOffset, resultSize] : - llvm::zip_equal(tiledResults, bbArgs, resultOffsets, resultSizes)) { - SmallVector strides(resultOffset.size(), - rewriter.getIndexAttr(1)); - rewriter.create( - firstYieldOpLoc, tiledResult, bbArg, resultOffset, resultSize, strides); - } -} - /// Implementation of fusing consumer of a single slice by computing the /// slice of the consumer in-place for scf loop. FailureOr @@ -1767,81 +1759,63 @@ mlir::scf::tileAndFuseConsumerOfSlice(RewriterBase &rewriter, consumerOp, "consumer op's operand doesn't seem to be an OpResult"); } - Operation *oldLoopOp = nullptr; - SmallVector newOuts; - Block *oldLoopBody = nullptr; - unsigned initSize = 0; - unsigned rank = 1; + // There are two possible cases regarding `oldLoopOp` here: + // 1. single `scf.forall` or `scf.for`. + // 2. inner-most `scf.for` insider nest `scf.loop` structure, where the + // top-level loop is the outer-most one of these nested loops. + LoopLikeOpInterface innerMostLoop = + candidateSliceOp->getParentOfType(); + SmallVector nestedLoops; if (isInsertSliceOp) { - auto forOp = candidateSliceOp->getParentOfType(); - oldLoopOp = forOp; - llvm::append_range(newOuts, forOp.getInits()); - oldLoopBody = forOp.getBody(); - initSize = forOp.getInits().size(); + nestedLoops = llvm::map_to_vector( + getPerfectlyNestedLoopsOutsideOf( + cast(innerMostLoop.getOperation())), + [](scf::ForOp forOp) { + return cast(forOp.getOperation()); + }); } else { - auto forallOp = candidateSliceOp->getParentOfType(); - oldLoopOp = forallOp; - llvm::append_range(newOuts, forallOp.getOutputs()); - oldLoopBody = forallOp.getBody(); - initSize = forallOp.getOutputs().size(); - rank = forallOp.getRank(); + nestedLoops = {innerMostLoop}; } - if (failed(checkAssumptionForLoop(oldLoopOp, consumerOp))) { + LoopLikeOpInterface outerMostLoop = nestedLoops.front(); + + if (failed(checkAssumptionForLoop(outerMostLoop, consumerOp))) { return rewriter.notifyMatchFailure( - oldLoopOp, "containing loop op should either yield just one value or " - "have the consumer op as its first user"); + outerMostLoop, + "containing loop op should either yield just one value or " + "have the consumer op as its first user"); } OpBuilder::InsertionGuard g(rewriter); // 2. Check consumer is not using scf loop's output as init. - auto dstOp = cast(consumerOp); + auto dstOp = dyn_cast(consumerOp); + if (!dstOp) + return rewriter.notifyMatchFailure(consumerOp, + "consumer op is not DPS operation"); SmallVector dpsInits = llvm::map_to_vector(dstOp.getDpsInits(), [](Value v) { return v; }); - if (llvm::is_contained(dpsInits, oldLoopOp->getResult(resultNumber))) { + if (llvm::is_contained(dpsInits, outerMostLoop->getResult(resultNumber))) { return rewriter.notifyMatchFailure( consumerOp, "consumer op taking the result of scf.for as init is not supported"); } - newOuts.append(dpsInits); + SmallVector newInits = dpsInits; - Location loc = oldLoopOp->getLoc(); - - // 3. Create new scf loop op. - rewriter.setInsertionPoint(consumerOp); - Operation *newLoopOp = nullptr; - Block *newLoopBody = nullptr; - if (isInsertSliceOp) { - auto forOp = cast(oldLoopOp); - auto newForOp = rewriter.create(loc, forOp.getLowerBound(), - forOp.getUpperBound(), - forOp.getStep(), newOuts); - newLoopOp = newForOp; - newLoopBody = newForOp.getBody(); - } else { - auto forallOp = cast(oldLoopOp); - auto newForallOp = rewriter.create( - loc, forallOp.getMixedLowerBound(), forallOp.getMixedUpperBound(), - forallOp.getMixedStep(), newOuts, forallOp.getMapping()); - newLoopOp = newForallOp; - rewriter.eraseOp(newForallOp.getTerminator()); - newLoopBody = newForallOp.getBody(); - } + Location loc = outerMostLoop->getLoc(); - // 4. Move the loop body to the new op. - unsigned oldNumArguments = oldLoopBody->getNumArguments(); - rewriter.mergeBlocks(oldLoopBody, newLoopBody, - newLoopBody->getArguments().take_front(oldNumArguments)); + // 3. Move the whole loop structure right before consumer Op, the dominance + // should be already ensured by `checkAssumptionForLoop`. + rewriter.moveOpBefore(outerMostLoop, consumerOp); - // 5. Set insertion point before terminator op of the loop and create a new + // 4. Set insertion point before terminator op of the loop and create a new // tensor.insert_slice. In the scf.for case this is a clone of the // candidateSliceOp whereas in the scf.forall case this is created from the // operands of tensor.parallel_insert_slice. tensor::InsertSliceOp clonedInsertSliceOp; if (auto sliceOp = dyn_cast(candidateSliceOp)) { - auto newForallOp = cast(newLoopOp); + auto newForallOp = cast(innerMostLoop.getOperation()); rewriter.setInsertionPoint(newForallOp.getTerminator()); clonedInsertSliceOp = rewriter.create( loc, sliceOp.getSource(), sliceOp.getDest(), sliceOp.getMixedOffsets(), @@ -1852,20 +1826,17 @@ mlir::scf::tileAndFuseConsumerOfSlice(RewriterBase &rewriter, cast(rewriter.clone(*candidateSliceOp)); } - // 6.a. Clone consumer op. - auto newForOpBlockArgsForConsumerDest = - newLoopBody->getArguments().drop_front(oldNumArguments); - auto clonedConsumerOp = cast(cloneOpAndUpdateDestinationArgs( - rewriter, consumerOp, newForOpBlockArgsForConsumerDest)); + // 5.a. Clone consumer op. + auto clonedConsumerOp = cast(rewriter.clone(*consumerOp)); - // 6.b. Replace all uses of the loop result with the result of the cloned + // 5.b. Replace all uses of the loop result with the result of the cloned // tensor.insert_slice. OpOperand &operandToReplace = clonedConsumerOp->getOpOperand(operandNumber); rewriter.modifyOpInPlace(clonedConsumerOp, [&]() { operandToReplace.set(clonedInsertSliceOp.getResult()); }); - // 7 - Perform tiling of the cloned consumer and replace the operand at + // 6. Perform tiling of the cloned consumer and replace the operand at // `operandNumber` with the source of the cloned tensor.insert_slice op. auto ossSliceOp = cast(clonedInsertSliceOp.getOperation()); @@ -1875,79 +1846,108 @@ mlir::scf::tileAndFuseConsumerOfSlice(RewriterBase &rewriter, if (failed(tileAndFuseResult)) { return failure(); } - rewriter.replaceAllUsesWith( - tileAndFuseResult->tiledOps[0]->getOperand(operandNumber), - clonedInsertSliceOp.getSource()); - - // 8 - Extract offset/sizes/strides required to create the - // tensor.insert_slice/parallel_insert_slice for each result of the consumer. - SmallVector offsets = ossSliceOp.getMixedOffsets(); - SmallVector sizes = ossSliceOp.getMixedSizes(); - SmallVector strides = ossSliceOp.getMixedStrides(); - - // 9. Check all insert stride is 1. - if (llvm::any_of(strides, [](OpFoldResult stride) { - return !isConstantIntValue(stride, 1); - })) { - return rewriter.notifyMatchFailure( - candidateSliceOp, "containingOp's result yield with stride"); - } + auto tiledConsumerOp = cast(tileAndFuseResult->tiledOps[0]); + rewriter.replaceAllUsesWith(tiledConsumerOp->getOperand(operandNumber), + clonedInsertSliceOp.getSource()); - // 10. Try to get iter domain position from input position. - SmallVector iterDomainOffsets, iterDomainSizes; - if (failed(clonedConsumerOp.getIterationDomainTileFromOperandTile( - rewriter, operandNumber, offsets, sizes, iterDomainOffsets, - iterDomainSizes))) { - return rewriter.notifyMatchFailure( - clonedConsumerOp, "can't get iter domain position from input position"); - } + // 7. Reconstruct [nested] loop with new inits. + YieldTiledValuesFn newYieldValuesFn = + [&](RewriterBase &innerRewriter, Location loc, ValueRange /*ivs*/, + ValueRange newRegionIterArgs, SmallVector &tiledResult, + SmallVector> &tiledOffset, + SmallVector> &tiledSizes) -> LogicalResult { + OpBuilder::InsertionGuard g(innerRewriter); + // 8. Set inner insertPoint right before tiled consumer op. + innerRewriter.setInsertionPoint(tiledConsumerOp); + + SmallVector offsets = ossSliceOp.getMixedOffsets(); + SmallVector sizes = ossSliceOp.getMixedSizes(); + SmallVector strides = ossSliceOp.getMixedStrides(); - // 11. Try to fetch the offset and size for all results of the cloned - // consumer. This would then be used to form the corresponding - // tensor.insert_slice/parallel_insert_slice later. - unsigned totalNumResultsOfConsumer = clonedConsumerOp->getNumResults(); - SmallVector> resultOffsets( - totalNumResultsOfConsumer); - SmallVector> resultSizes(totalNumResultsOfConsumer); - for (auto [idx, v] : llvm::enumerate(clonedConsumerOp->getResults())) { - if (failed(clonedConsumerOp.getResultTilePosition( - rewriter, idx, iterDomainOffsets, iterDomainSizes, - resultOffsets[idx], resultSizes[idx]))) { + // 9. Check all insert stride is 1. + if (llvm::any_of(strides, [](OpFoldResult stride) { + return !isConstantIntValue(stride, 1); + })) { return rewriter.notifyMatchFailure( - clonedConsumerOp, - "can't get result domain position from iter domain position"); + candidateSliceOp, "containingOp's result yield with stride"); } - } - auto arrayRefOffsets = ArrayRef>(resultOffsets); - auto arrayRefSizes = ArrayRef>(resultSizes); - if (isInsertSliceOp) { - auto newForOp = cast(newLoopOp); - fixTerminatorSCFYield( - rewriter, newForOp, *tileAndFuseResult, arrayRefOffsets, arrayRefSizes, - newForOp.getBody()->getArguments().drop_front(1 + initSize)); - } else { - auto newForallOp = cast(newLoopOp); - fixTerminatorSCFInParallel( - rewriter, newForallOp, tileAndFuseResult->tiledOps[0]->getResults(), - arrayRefOffsets, arrayRefSizes, - newForallOp.getBody()->getArguments().drop_front(rank + initSize)); - } + // 10. Try to get iter domain position from input position. + SmallVector iterDomainOffsets, iterDomainSizes; + if (failed(tiledConsumerOp.getIterationDomainTileFromOperandTile( + rewriter, operandNumber, offsets, sizes, iterDomainOffsets, + iterDomainSizes))) { + return rewriter.notifyMatchFailure( + tiledConsumerOp, + "can't get iter domain position from input position"); + } - // 12. Replace the result of scf loop and consumer op with new loop's results. - for (auto &&[oldResult, newResult] : - llvm::zip_first(oldLoopOp->getResults(), newLoopOp->getResults())) { - rewriter.replaceAllUsesWith(oldResult, newResult); + // 11. Try to fetch the offset and size for all results of the cloned + // consumer. This would then be used to form the corresponding + // tensor.insert_slice/parallel_insert_slice later. + unsigned totalNumResultsOfConsumer = tiledConsumerOp->getNumResults(); + SmallVector> resultOffsets( + totalNumResultsOfConsumer); + SmallVector> resultSizes( + totalNumResultsOfConsumer); + for (auto [idx, v] : llvm::enumerate(tiledConsumerOp->getResults())) { + if (failed(tiledConsumerOp.getResultTilePosition( + rewriter, idx, iterDomainOffsets, iterDomainSizes, + resultOffsets[idx], resultSizes[idx]))) { + return rewriter.notifyMatchFailure( + tiledConsumerOp, + "can't get result domain position from iter domain position"); + } + } + + // 12. Create `extract_slice` for `iter_args` for DPS operation if + // necessary. + if (auto tiledDestStyleOp = dyn_cast( + tiledConsumerOp.getOperation())) { + rewriter.setInsertionPoint(tiledDestStyleOp); + for (const auto &&[index, newRegionArg] : + llvm::enumerate(newRegionIterArgs)) { + auto destSlice = rewriter.create( + loc, newRegionArg, resultOffsets[index], resultSizes[index], + SmallVector(resultOffsets[index].size(), + rewriter.getIndexAttr(1))); + // Make a copy of index to avoid a capturing structured binding, which + // is a C++20 extension. + auto dstNumber = index; + rewriter.modifyOpInPlace(tiledDestStyleOp, [&]() { + tiledDestStyleOp.getDpsInitsMutable()[dstNumber].set(destSlice); + }); + } + } + + // 13. Prepare tiled offset and sizes for later `insert_slice` creation by + // caller. + Block *block = rewriter.getInsertionPoint()->getBlock(); + rewriter.setInsertionPoint(block->getTerminator()); + for (const auto &&[index, result] : + llvm::enumerate(tiledConsumerOp->getResults())) { + tiledResult.push_back(result); + tiledOffset.emplace_back(resultOffsets[index]); + tiledSizes.emplace_back(resultSizes[index]); + } + return success(); + }; + // 14. Add new inits to [nested] loops. + if (failed(addInitOperandsToLoopNest(rewriter, nestedLoops, newInits, + newYieldValuesFn))) { + return rewriter.notifyMatchFailure(tiledConsumerOp, + "unable to add new inits to nest loop"); } - for (auto &&[oldResult, newResult] : - llvm::zip(consumerOp->getResults(), - newLoopOp->getResults().drop_front(initSize))) { + // 15. Replace the result of scf loop and consumer op with new loop's results. + + for (auto &&[oldResult, newResult] : llvm::zip( + consumerOp->getResults(), + nestedLoops.front()->getResults().take_back(newInits.size()))) { rewriter.replaceAllUsesWith(oldResult, newResult); } - // 13. Need to erase the old scf loop and the cloned consumer op. - rewriter.eraseOp(oldLoopOp); + // 16. Need to erase the old scf loop and the cloned consumer op. rewriter.eraseOp(clonedConsumerOp); return scf::SCFFuseConsumerOfSliceResult{ diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir index 717667c22af800..cc51a8c40942f9 100644 --- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir +++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir @@ -151,6 +151,17 @@ func.func @gpu_gcn_raw_buffer_atomic_fadd_f32(%value: f32, %buf: memref<64xf32>, func.return } +// CHECK-LABEL: func @gpu_gcn_raw_buffer_atomic_fadd_v2f16 +func.func @gpu_gcn_raw_buffer_atomic_fadd_v2f16(%value: vector<2xf16>, %buf: memref<64xf16>, %idx: i32) { + // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(128 : i32) + // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32) + // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32) + // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]] + // CHECK: rocdl.raw.ptr.buffer.atomic.fadd %{{.*}}, %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : vector<2xf16> + amdgpu.raw_buffer_atomic_fadd {boundsCheck = true} %value -> %buf[%idx] : vector<2xf16> -> memref<64xf16>, i32 + func.return +} + // CHECK-LABEL: func @gpu_gcn_raw_buffer_atomic_fmax_f32 func.func @gpu_gcn_raw_buffer_atomic_fmax_f32(%value: f32, %buf: memref<64xf32>, %idx: i32) { // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32) diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index f6c1d725ba13b4..c295e2c8e2ba5c 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -626,3 +626,12 @@ func.func @test_table_io_shape_mismatch(%arg0: tensor, %arg1: tensor<6 %0 = tosa.table %arg0, %arg1 : (tensor, tensor<6xi16>) -> tensor return } + +// ----- + +// CHECK-LABEL: test_transpose_conv2d_invalid_outshape +func.func @test_transpose_conv2d_invalid_outshape(%arg0: tensor<1x32x32x8xf32>, %arg1: tensor<16x1x1x8xf32>, %arg2: tensor<16xf32>) -> tensor<1x32x32x16xf32> { + // expected-error@+1 {{'tosa.transpose_conv2d' op attribute 'out_shape' failed to satisfy constraint: i64 dense array attribute with exactly 4 elements}} + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2 {out_pad = array, out_shape = array, stride = array} : (tensor<1x32x32x8xf32>, tensor<16x1x1x8xf32>, tensor<16xf32>) -> tensor<1x32x32x16xf32> + return %0 : tensor<1x32x32x16xf32> +} diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir index 83c5ec8d7342c8..fdefdcc453ae7a 100644 --- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir @@ -109,9 +109,9 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: ins(%[[MAT_OUT]], %[[SLICE_OPERAND2]] : // CHECK-SAME: outs(%[[SLICE_OUT]] : // CHECK: scf.forall.in_parallel { -// CHECK: tensor.parallel_insert_slice %[[ELEM_OUT]] into %[[ELEM_OUT_ARG]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1] // CHECK: tensor.parallel_insert_slice %[[MAT_OUT]] into %[[SECOND_OUT_ARG]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1] // CHECK: tensor.parallel_insert_slice %[[SECOND_ARG_SLICE]] into %[[FIRST_OUT_ARG]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1] +// CHECK: tensor.parallel_insert_slice %[[ELEM_OUT]] into %[[ELEM_OUT_ARG]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1] // CHECK: } // CHECK: } // CHECK: return %[[FINAL_RESULT]]#2 : @@ -248,10 +248,10 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: ins(%[[MAT_OUT]], %[[SLICE_OPERAND2]] : // CHECK-SAME: outs(%[[SLICE_OUT_0]], %[[SLICE_OUT_1]] : // CHECK: scf.forall.in_parallel { -// CHECK: tensor.parallel_insert_slice %[[ELEM_OUT]]#0 into %[[ELEM_OUT_ARG_0]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1] -// CHECK: tensor.parallel_insert_slice %[[ELEM_OUT]]#1 into %[[ELEM_OUT_ARG_1]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1] // CHECK: tensor.parallel_insert_slice %[[MAT_OUT]] into %[[SECOND_OUT_ARG]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1] // CHECK: tensor.parallel_insert_slice %[[SECOND_ARG_SLICE]] into %[[FIRST_OUT_ARG]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1] +// CHECK: tensor.parallel_insert_slice %[[ELEM_OUT]]#0 into %[[ELEM_OUT_ARG_0]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1] +// CHECK: tensor.parallel_insert_slice %[[ELEM_OUT]]#1 into %[[ELEM_OUT_ARG_1]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1] // CHECK: } // CHECK: } // CHECK: %[[UNPACK:.*]] = tensor.unpack %[[FINAL_RESULT]]#0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %{{.*}} : tensor<64x32xf32> -> tensor<2048xf32> @@ -310,8 +310,8 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] // CHECK-SAME: into %[[TILED_UNPACK_DEST]] // CHECK: scf.forall.in_parallel { -// CHECK: tensor.parallel_insert_slice %[[TILED_UNPACK_OUT]] into %[[UNPACK_OUT_ARG]][%[[UNPACK_RESULT_OFFSET]]] [1024] [1] // CHECK: tensor.parallel_insert_slice %[[GENERIC_OUT]] into %[[FIRST_OUT_ARG]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1] +// CHECK: tensor.parallel_insert_slice %[[TILED_UNPACK_OUT]] into %[[UNPACK_OUT_ARG]][%[[UNPACK_RESULT_OFFSET]]] [1024] [1] // CHECK: } // CHECK: } // CHECK: return %[[FINAL_RESULT]]#1 : @@ -369,8 +369,71 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [16] // CHECK-SAME: into %[[TILED_PACK_DEST]] // CHECK: scf.forall.in_parallel { -// CHECK: tensor.parallel_insert_slice %[[TILED_PACK_OUT]] into %[[PACK_OUT_ARG]][%[[PACK_RESULT_OFFSET]], %[[IV2]], 0] [2, 32, 16] [1, 1, 1] // CHECK: tensor.parallel_insert_slice %[[GENERIC_OUT]] into %[[FIRST_OUT_ARG]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1] -// CHECK: } +// CHECK: tensor.parallel_insert_slice %[[TILED_PACK_OUT]] into %[[PACK_OUT_ARG]][%[[PACK_RESULT_OFFSET]], %[[IV2]], 0] [2, 32, 16] [1, 1, 1] + +// ----- + +module { + func.func @fuse_add_consumer_into_nested_scf_for(%arg0: tensor<256x512xf32>, %arg1: tensor<512x256xf32>, %arg2: tensor<256x256xf32>) -> tensor<256x256xf32> { + %c0 = arith.constant 0 : index + %c64 = arith.constant 64 : index + %c256 = arith.constant 256 : index + %cst = arith.constant 0.000000e+00 : f32 + %dest0 = tensor.empty() : tensor<256x256xf32> + %dest1 = linalg.fill ins(%cst : f32) outs(%dest0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = scf.for %arg3 = %c0 to %c256 step %c64 iter_args(%arg4 = %dest1) -> (tensor<256x256xf32>) { + %2 = scf.for %arg5 = %c0 to %c256 step %c64 iter_args(%arg6 = %arg4) -> (tensor<256x256xf32>) { + %extracted_slice_1 = tensor.extract_slice %arg6[%arg3, %arg5] [64, 64] [1, 1] : tensor<256x256xf32> to tensor<64x64xf32> + %extracted_slice_2 = tensor.extract_slice %arg0[%arg3, 0] [64, 512] [1, 1] : tensor<256x512xf32> to tensor<64x512xf32> + %extracted_slice_3 = tensor.extract_slice %arg1[0, %arg5] [512, 64] [1, 1] : tensor<512x256xf32> to tensor<512x64xf32> + %3 = linalg.matmul ins(%extracted_slice_2, %extracted_slice_3 : tensor<64x512xf32>, tensor<512x64xf32>) outs(%extracted_slice_1 : tensor<64x64xf32>) -> tensor<64x64xf32> + %insert_slice = tensor.insert_slice %3 into %arg6[%arg3, %arg5] [64, 64] [1, 1] : tensor<64x64xf32> into tensor<256x256xf32> + scf.yield %insert_slice : tensor<256x256xf32> + } + scf.yield %2 : tensor<256x256xf32> + } + %4 = linalg.add ins(%1, %arg2 : tensor<256x256xf32>, tensor<256x256xf32>) outs(%dest0 : tensor<256x256xf32>) -> tensor<256x256xf32> + return %4 : tensor<256x256xf32> + } +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) { + %slice_op = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + %a, %b = transform.test.fuse_consumer %slice_op + : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.yield + } +} +// CHECK: func.func @fuse_add_consumer_into_nested_scf_for( +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<256x512xf32> +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<512x256xf32> +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<256x256xf32> +// CHECK: %[[dest0:.*]] = tensor.empty() : tensor<256x256xf32> +// CHECK: %[[dest1:.*]] = linalg.fill +// CHECK-SAME: outs(%[[dest0]] : +// CHECK: %[[LOOP_RESULT1:.*]]:2 = scf.for %[[IV1:.*]] = %[[C0]] +// CHECK-SAME: iter_args(%[[FIRST_OUT_ARG1:.*]] = %[[dest1]], %[[SECOND_OUT_ARG1:.*]] = %[[dest0]]) +// CHECK-SAME: { +// CHECK: %[[LOOP_RESULT2:.*]]:2 = scf.for %[[IV2:.*]] = %[[C0]] +// CHECK-SAME: iter_args(%[[FIRST_OUT_ARG2:.*]] = %[[FIRST_OUT_ARG1]], %[[SECOND_OUT_ARG2:.*]] = %[[SECOND_OUT_ARG1]]) +// CHECK-SAME: { +// CHECK: %[[MAT_OUT_SLICE:.*]] = tensor.extract_slice %[[FIRST_OUT_ARG2]][%[[IV1]], %[[IV2]]] [64, 64] [1, 1] +// CHECK: %[[INPUT_SLICE:.*]] = tensor.extract_slice %[[ARG0]][%[[IV1]], 0] [64, 512] [1, 1] +// CHECK: %[[WEIGHT_SLICE:.*]] = tensor.extract_slice %[[ARG1]][0, %[[IV2]]] [512, 64] [1, 1] +// CHECK: %[[TILED_MAT_OUT:.*]] = linalg.matmul +// CHECK-SAME: outs(%[[MAT_OUT_SLICE]] : +// CHECK: %[[INSERT_MAT:.*]] = tensor.insert_slice %[[TILED_MAT_OUT]] into %[[FIRST_OUT_ARG2]][%[[IV1]], %[[IV2]]] [64, 64] [1, 1] +// CHECK: %[[ADD_OPERAND2_SLICE:.*]] = tensor.extract_slice %[[ARG2]][%[[IV1]], %[[IV2]]] [64, 64] [1, 1] +// CHECK: %[[ADD_OUT_SLICE:.*]] = tensor.extract_slice %[[SECOND_OUT_ARG2]][%[[IV1]], %[[IV2]]] [64, 64] [1, 1] +// CHECK: %[[TILED_ADD_OUT:.*]] = linalg.add +// CHECK-SAME: ins(%[[TILED_MAT_OUT]], %[[ADD_OPERAND2_SLICE]] : +// CHECK-SAME: outs(%[[ADD_OUT_SLICE]] : +// CHECK: %[[INSERT_ADD:.*]] = tensor.insert_slice %[[TILED_ADD_OUT]] into %[[SECOND_OUT_ARG2]][%[[IV1]], %[[IV2]]] [64, 64] [1, 1] +// CHECK: scf.yield %[[INSERT_MAT]], %[[INSERT_ADD]] : +// CHECK: } +// CHECK: scf.yield %[[LOOP_RESULT2]]#0, %[[LOOP_RESULT2]]#1 : // CHECK: } -// CHECK: return %[[FINAL_RESULT]]#1 : +// CHECK: return %[[LOOP_RESULT1]]#1 : diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index f3d3c745246af8..4be8e17e7df24a 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -272,6 +272,16 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_macros_null_check", + hdrs = ["src/__support/macros/null_check.h"], + deps = [ + ":__support_macros_config", + ":__support_macros_optimization", + ":__support_macros_sanitizer", + ], +) + libc_support_library( name = "__support_common", hdrs = [ @@ -665,6 +675,7 @@ libc_support_library( ":__support_ctype_utils", ":__support_fputil_fp_bits", ":__support_fputil_rounding_mode", + ":__support_macros_null_check", ":__support_str_to_integer", ":__support_str_to_num_result", ":__support_uint128", @@ -4126,25 +4137,23 @@ libc_function( ], ) -#TODO: Enable once epoll_pwait2 availablilty can be checked first. -# https://github.com/llvm/llvm-project/issues/80060 -# libc_function( -# name = "epoll_pwait2", -# srcs = ["src/sys/epoll/linux/epoll_pwait2.cpp"], -# hdrs = ["src/sys/epoll/epoll_pwait2.h"], -# target_compatible_with = select({ -# "@platforms//os:linux": [], -# "//conditions:default": ["@platforms//:incompatible"], -# }), -# weak = True, -# deps = [ -# ":__support_macros_sanitizer", -# ":__support_osutil_syscall", -# ":errno", -# ":hdr_signal_macros", -# ":hdr_sys_epoll_macros", -# ":types_sigset_t", -# ":types_struct_epoll_event", -# ":types_struct_timespec", -# ], -# ) +libc_function( + name = "epoll_pwait2", + srcs = ["src/sys/epoll/linux/epoll_pwait2.cpp"], + hdrs = ["src/sys/epoll/epoll_pwait2.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + weak = True, + deps = [ + ":__support_macros_sanitizer", + ":__support_osutil_syscall", + ":errno", + ":hdr_signal_macros", + ":hdr_sys_epoll_macros", + ":types_sigset_t", + ":types_struct_epoll_event", + ":types_struct_timespec", + ], +) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/sys/epoll/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/sys/epoll/BUILD.bazel index b090bde35b88d6..7fb50403682a74 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/sys/epoll/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/sys/epoll/BUILD.bazel @@ -78,21 +78,19 @@ libc_test( ], ) -#TODO: Enable once epoll_pwait2 availablilty can be checked first. -# https://github.com/llvm/llvm-project/issues/80060 -# libc_test( -# name = "epoll_pwait2_test", -# srcs = ["linux/epoll_pwait2_test.cpp"], -# libc_function_deps = [ -# "//libc:epoll_pwait2", -# "//libc:epoll_create1", -# "//libc:epoll_ctl", -# "//libc:pipe", -# "//libc:close", -# ], -# deps = [ -# "//libc:hdr_sys_epoll_macros", -# "//libc:types_struct_epoll_event", -# "//libc:types_struct_timespec", -# ], -# ) +libc_test( + name = "epoll_pwait2_test", + srcs = ["linux/epoll_pwait2_test.cpp"], + libc_function_deps = [ + "//libc:epoll_pwait2", + "//libc:epoll_create1", + "//libc:epoll_ctl", + "//libc:pipe", + "//libc:close", + ], + deps = [ + "//libc:hdr_sys_epoll_macros", + "//libc:types_struct_epoll_event", + "//libc:types_struct_timespec", + ], +)