From 68d993e913ee0ac61dfcb10cabb1f8e43ea4234a Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 25 Oct 2023 21:55:38 -0700 Subject: [PATCH 001/877] [Github] Add lld to docs CI (#69821) This patch adds the lld documentation to the documentation github actions CI to automatically validate in PRs/at tip of tree that the docs build and there aren't any Sphinx warnings. There is existing buildbot coverage for the lld docs, but this much more convienient to use in cases like PRs. --- .github/workflows/docs.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index a0670625bb3e3..cbb3706cc1bcf 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -20,6 +20,7 @@ on: - 'libunwind/docs/**' - 'libcxx/docs/**' - 'libc/docs/**' + - 'lld/docs/**' pull_request: paths: - 'llvm/docs/**' @@ -29,6 +30,7 @@ on: - 'libunwind/docs/**' - 'libcxx/docs/**' - 'libc/docs/**' + - 'lld/docs/**' jobs: check-docs-build: @@ -63,6 +65,8 @@ jobs: - 'libcxx/docs/**' libc: - 'libc/docs/**' + lld: + - 'lld/docs/**' - name: Fetch LLVM sources (PR) if: ${{ github.event_name == 'pull_request' }} uses: actions/checkout@v4 @@ -116,4 +120,9 @@ jobs: run: | cmake -B libc-build -GNinja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_RUNTIMES="libc" -DLLVM_ENABLE_SPHINX=ON ./runtimes TZ=UTC ninja -C docs-libc-html + - name: Build LLD docs + if: steps.docs-changed-subprojects.outputs.lld_any_changed == 'true' + run: | + cmake -B lld-build -GNinja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS="lld" -DLLVM_ENABLE_SPHINX=ON ./llvm + TZ=UTC ninja -C lld-build docs-lld-html From 13ea1146a78ef1e00d88b50fd0f903f336751003 Mon Sep 17 00:00:00 2001 From: Rana Pratap Reddy <109514914+ranapratap55@users.noreply.github.com> Date: Thu, 26 Oct 2023 10:26:11 +0530 Subject: [PATCH 002/877] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (#69567) Currently __builtin_amdgcn_read_exec_hi lowers to llvm.read_register, this patch lowers it to use amdgcn_ballot. --- clang/lib/CodeGen/CGBuiltin.cpp | 21 +++++++++++----- .../CodeGenOpenCL/builtins-amdgcn-wave32.cl | 24 +++++++++++++++++++ .../CodeGenOpenCL/builtins-amdgcn-wave64.cl | 23 ++++++++++++++++++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 4 +++- 4 files changed, 65 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b6..85be8bdd00516 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7995,15 +7995,23 @@ enum SpecialRegisterAccessKind { Write, }; +// Generates the IR for __builtin_read_exec_*. +// Lowers the builtin to amdgcn_ballot intrinsic. static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; - llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); + + if (isExecHi) { + Value *Rt2 = Builder.CreateLShr(Call, 32); + Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty); + return Rt2; + } + return Call; } @@ -17857,10 +17865,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); } case AMDGPU::BI__builtin_amdgcn_read_exec: + return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: - case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { - return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty); - } + return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: + return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl index a4d14cf1f6cf0..43553131f63c5 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl @@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave32_target_attr( // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}}) __attribute__((target("wavefrontsize32"))) @@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +void test_read_exec(global uint* out) { + *out = __builtin_amdgcn_read_exec(); +} + +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + +// CHECK-LABEL: @test_read_exec_lo( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +void test_read_exec_lo(global uint* out) { + *out = __builtin_amdgcn_read_exec_lo(); +} + +// CHECK-LABEL: @test_read_exec_hi( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +// CHECK: trunc i64 [[B:%.*]] to i32 +void test_read_exec_hi(global uint* out) { + *out = __builtin_amdgcn_read_exec_hi(); +} + #if __AMDGCN_WAVEFRONT_SIZE != 32 #error Wrong wavesize detected #endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl index 563c9a2a240c1..53f34c6a44ae7 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl @@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave64_target_attr( // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}}) __attribute__((target("wavefrontsize64"))) @@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +void test_read_exec(global ulong* out) { + *out = __builtin_amdgcn_read_exec(); +} + +// CHECK-LABEL: @test_read_exec_lo( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +void test_read_exec_lo(global ulong* out) { + *out = __builtin_amdgcn_read_exec_lo(); +} + +// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + +// CHECK-LABEL: @test_read_exec_hi( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +void test_read_exec_hi(global ulong* out) { + *out = __builtin_amdgcn_read_exec_hi(); +} + #if __AMDGCN_WAVEFRONT_SIZE != 64 #error Wrong wavesize detected #endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 8938642e3b19f..0bc9a54682d3e 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) { // CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] // CHECK-LABEL: @test_read_exec_hi( -// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +// CHECK: trunc i64 [[B:%.*]] to i32 void test_read_exec_hi(global uint* out) { *out = __builtin_amdgcn_read_exec_hi(); } From 9c3c0e324f5152c699cc14b79a630589be5eced3 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Thu, 26 Oct 2023 13:00:36 +0800 Subject: [PATCH 003/877] [RISCV] Separate addend from FMA operands to support cascade FMA. NFC. (#70241) This PR separate addend from FMA operands to support cascade FMA. In some microarchitectures (e.g., ARM cortex-a72 and XiangShan-NanHu), FP multiply-accumulate pipelines support late-forwarding of accumulate operands, which reduces the latency of a sequence of multiply-accumulate instructions. See also #70232. --- llvm/lib/Target/RISCV/RISCVInstrInfoD.td | 2 +- llvm/lib/Target/RISCV/RISCVInstrInfoF.td | 2 +- llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td | 2 +- llvm/lib/Target/RISCV/RISCVSchedRocket.td | 2 ++ llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 3 +++ llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td | 2 ++ llvm/lib/Target/RISCV/RISCVSchedule.td | 3 +++ 7 files changed, 13 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 59312f02aeceb..34becfafe7747 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -78,7 +78,7 @@ def FSD : FPStore_r<0b011, "fsd", FPR64, WriteFST64>; } // Predicates = [HasStdExtD] foreach Ext = DExts in { - let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in { + let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64Addend] in { defm FMADD_D : FPFMA_rrr_frm_m; defm FMSUB_D : FPFMA_rrr_frm_m; defm FNMSUB_D : FPFMA_rrr_frm_m; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index 8726245f1602e..3a5794bb2d194 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -302,7 +302,7 @@ def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>; } // Predicates = [HasStdExtF] foreach Ext = FExts in { - let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in { + let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32Addend] in { defm FMADD_S : FPFMA_rrr_frm_m; defm FMSUB_S : FPFMA_rrr_frm_m; defm FNMSUB_S : FPFMA_rrr_frm_m; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index b65e9f5af0331..1dc391d3f084f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -85,7 +85,7 @@ def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>; } // Predicates = [HasHalfFPLoadStoreMove] foreach Ext = ZfhExts in { - let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in { + let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16Addend] in { defm FMADD_H : FPFMA_rrr_frm_m; defm FMSUB_H : FPFMA_rrr_frm_m; defm FNMSUB_H : FPFMA_rrr_frm_m; diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index 8fbc9afe267c5..bb9dfe5d01240 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -206,7 +206,9 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 96ebe8e3e6768..d2447cf23e266 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -933,10 +933,13 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td index 960258c8bc7df..06ad2075b0736 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td @@ -164,7 +164,9 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td index af318ea5bf685..f6c1b096ad90c 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -150,8 +150,11 @@ def ReadFMul16 : SchedRead; // 16-bit floating point multiply def ReadFMul32 : SchedRead; // 32-bit floating point multiply def ReadFMul64 : SchedRead; // 64-bit floating point multiply def ReadFMA16 : SchedRead; // 16-bit floating point fused multiply-add +def ReadFMA16Addend : SchedRead; // 16-bit floating point fused multiply-add (addend) def ReadFMA32 : SchedRead; // 32-bit floating point fused multiply-add +def ReadFMA32Addend : SchedRead; // 32-bit floating point fused multiply-add (addend) def ReadFMA64 : SchedRead; // 64-bit floating point fused multiply-add +def ReadFMA64Addend : SchedRead; // 64-bit floating point fused multiply-add (addend) def ReadFDiv16 : SchedRead; // 16-bit floating point divide def ReadFDiv32 : SchedRead; // 32-bit floating point divide def ReadFDiv64 : SchedRead; // 64-bit floating point divide From 1097c71dbeefaff0c353c90cb57bc07b6ede6383 Mon Sep 17 00:00:00 2001 From: Piotr Zegar Date: Thu, 26 Oct 2023 07:10:39 +0200 Subject: [PATCH 004/877] [clang-tidy] Support functional cast in bugprone-dangling-handle (#69067) Add support for constructor conversion based functional cast. Allows to detect issues like: const std::string_view test1 = std::string(a); --- .../clang-tidy/bugprone/DanglingHandleCheck.cpp | 12 +++++++++--- clang-tools-extra/docs/ReleaseNotes.rst | 5 +++++ .../clang-tidy/checkers/bugprone/dangling-handle.cpp | 8 ++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.cpp index 9ded699ba78e6..d55df3a6d7b74 100644 --- a/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.cpp @@ -33,14 +33,20 @@ handleFrom(const ast_matchers::internal::Matcher &IsAHandle, ast_matchers::internal::Matcher handleFromTemporaryValue( const ast_matchers::internal::Matcher &IsAHandle) { + + const auto TemporaryExpr = anyOf( + cxxBindTemporaryExpr(), + cxxFunctionalCastExpr( + hasCastKind(CK_ConstructorConversion), + hasSourceExpression(ignoringParenImpCasts(cxxBindTemporaryExpr())))); // If a ternary operator returns a temporary value, then both branches hold a // temporary value. If one of them is not a temporary then it must be copied // into one to satisfy the type of the operator. const auto TemporaryTernary = conditionalOperator( - hasTrueExpression(ignoringParenImpCasts(cxxBindTemporaryExpr())), - hasFalseExpression(ignoringParenImpCasts(cxxBindTemporaryExpr()))); + hasTrueExpression(ignoringParenImpCasts(TemporaryExpr)), + hasFalseExpression(ignoringParenImpCasts(TemporaryExpr))); - return handleFrom(IsAHandle, anyOf(cxxBindTemporaryExpr(), TemporaryTernary)); + return handleFrom(IsAHandle, anyOf(TemporaryExpr, TemporaryTernary)); } ast_matchers::internal::Matcher isASequence() { diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 6d1992e12130d..ac95afd782e1d 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -207,6 +207,11 @@ New check aliases Changes in existing checks ^^^^^^^^^^^^^^^^^^^^^^^^^^ +- Improved :doc:`bugprone-dangling-handle + ` check to support functional + casting during type conversions at variable initialization, now with improved + compatibility for C++17 and later versions. + - Improved :doc:`bugprone-lambda-function-name ` check by adding option `IgnoreMacros` to ignore warnings in macros. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/dangling-handle.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/dangling-handle.cpp index 23cda53217643..96c812617038a 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/dangling-handle.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/dangling-handle.cpp @@ -108,6 +108,14 @@ void Positives() { std::string_view view4(ReturnsAString()); // CHECK-MESSAGES-CXX14: [[@LINE-1]]:20: warning: std::basic_string_view outlives // CHECK-MESSAGES-CXX17: [[@LINE-2]]:26: warning: std::basic_string_view outlives + + std::string_view view5 = std::string("test"); + // CHECK-MESSAGES-CXX14: [[@LINE-1]]:20: warning: std::basic_string_view outlives its value [bugprone-dangling-handle] + // CHECK-MESSAGES-CXX17: [[@LINE-2]]:28: warning: std::basic_string_view outlives its value [bugprone-dangling-handle] + + std::string_view view6 = std::string{"test"}; + // CHECK-MESSAGES-CXX14: [[@LINE-1]]:20: warning: std::basic_string_view outlives its value [bugprone-dangling-handle] + // CHECK-MESSAGES-CXX17: [[@LINE-2]]:28: warning: std::basic_string_view outlives its value [bugprone-dangling-handle] } void OtherTypes() { From af07d7ba883b6e4921820d88b6679f294a0b9fa5 Mon Sep 17 00:00:00 2001 From: Piotr Zegar Date: Thu, 26 Oct 2023 07:11:01 +0200 Subject: [PATCH 005/877] [clang-tidy] Improved cppcoreguidelines-pro-type-const-cast (#69501) Improved cppcoreguidelines-pro-type-const-cast check to ignore casts to const type (controlled by option) and casts in implicitly invoked code. Fixes #69319 --- .../ProTypeConstCastCheck.cpp | 49 ++++++++++- .../cppcoreguidelines/ProTypeConstCastCheck.h | 12 ++- clang-tools-extra/docs/ReleaseNotes.rst | 5 ++ .../cppcoreguidelines/pro-type-const-cast.rst | 32 ++++++- .../cppcoreguidelines/pro-type-const-cast.cpp | 86 ++++++++++++++++++- .../nonstandard-file-extension.test | 2 +- 6 files changed, 174 insertions(+), 12 deletions(-) diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.cpp index ef803ab85fa08..8c44c1bfb62b6 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.cpp @@ -14,13 +14,60 @@ using namespace clang::ast_matchers; namespace clang::tidy::cppcoreguidelines { +static bool hasConstQualifier(QualType Type) { + const QualType PtrType = Type->getPointeeType(); + if (!PtrType.isNull()) + return hasConstQualifier(PtrType); + + return Type.isConstQualified(); +} + +static bool hasVolatileQualifier(QualType Type) { + const QualType PtrType = Type->getPointeeType(); + if (!PtrType.isNull()) + return hasVolatileQualifier(PtrType); + return Type.isVolatileQualified(); +} + +ProTypeConstCastCheck::ProTypeConstCastCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + StrictMode(Options.getLocalOrGlobal("StrictMode", false)) {} + +void ProTypeConstCastCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "StrictMode", StrictMode); +} + void ProTypeConstCastCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher(cxxConstCastExpr().bind("cast"), this); } void ProTypeConstCastCheck::check(const MatchFinder::MatchResult &Result) { const auto *MatchedCast = Result.Nodes.getNodeAs("cast"); - diag(MatchedCast->getOperatorLoc(), "do not use const_cast"); + if (StrictMode) { + diag(MatchedCast->getOperatorLoc(), "do not use const_cast"); + return; + } + + const QualType TargetType = MatchedCast->getType().getCanonicalType(); + const QualType SourceType = + MatchedCast->getSubExpr()->getType().getCanonicalType(); + + const bool RemovingConst = + hasConstQualifier(SourceType) && !hasConstQualifier(TargetType); + const bool RemovingVolatile = + hasVolatileQualifier(SourceType) && !hasVolatileQualifier(TargetType); + + if (!RemovingConst && !RemovingVolatile) { + // Cast is doing nothing. + return; + } + + diag(MatchedCast->getOperatorLoc(), + "do not use const_cast to remove%select{| const}0%select{| " + "and}2%select{| volatile}1 qualifier") + << RemovingConst << RemovingVolatile + << (RemovingConst && RemovingVolatile); } } // namespace clang::tidy::cppcoreguidelines diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h index f7ae9bbb60dcd..8d93633a321b5 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h @@ -13,19 +13,25 @@ namespace clang::tidy::cppcoreguidelines { -/// This check flags all instances of const_cast +/// Imposes limitations on the use of const_cast within C++ code. /// /// For the user-facing documentation see: /// http://clang.llvm.org/extra/clang-tidy/checks/cppcoreguidelines/pro-type-const-cast.html class ProTypeConstCastCheck : public ClangTidyCheck { public: - ProTypeConstCastCheck(StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context) {} + ProTypeConstCastCheck(StringRef Name, ClangTidyContext *Context); bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { return LangOpts.CPlusPlus; } void registerMatchers(ast_matchers::MatchFinder *Finder) override; void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + std::optional getCheckTraversalKind() const override { + return TK_IgnoreUnlessSpelledInSource; + } + +private: + const bool StrictMode; }; } // namespace clang::tidy::cppcoreguidelines diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index ac95afd782e1d..13003a118c36a 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -255,6 +255,11 @@ Changes in existing checks ` check to perform checks on derived classes of ``std::array``. +- Improved :doc:`cppcoreguidelines-pro-type-const-cast + ` check to ignore + casts to ``const`` or ``volatile`` type (controlled by `StrictMode` option) + and casts in implicitly invoked code. + - Improved :doc:`cppcoreguidelines-pro-type-member-init ` check to ignore dependent delegate constructors. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines/pro-type-const-cast.rst b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines/pro-type-const-cast.rst index eb572e625f129..961a591cb81f8 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines/pro-type-const-cast.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines/pro-type-const-cast.rst @@ -3,11 +3,35 @@ cppcoreguidelines-pro-type-const-cast ===================================== -This check flags all uses of ``const_cast`` in C++ code. +Imposes limitations on the use of ``const_cast`` within C++ code. It depends on +the :option:`StrictMode` option setting to determine whether it should flag all +instances of ``const_cast`` or only those that remove either ``const`` or +``volatile`` qualifier. -Modifying a variable that was declared const is undefined behavior, even with -``const_cast``. +Modifying a variable that has been declared as ``const`` in C++ is generally +considered undefined behavior, and this remains true even when using +``const_cast``. In C++, the ``const`` qualifier indicates that a variable is +intended to be read-only, and the compiler enforces this by disallowing any +attempts to change the value of that variable. + +Removing the ``volatile`` qualifier in C++ can have serious consequences. This +qualifier indicates that a variable's value can change unpredictably, and +removing it may lead to undefined behavior, optimization problems, and debugging +challenges. It's essential to retain the ``volatile`` qualifier in situations +where the variable's volatility is a crucial aspect of program correctness and +reliability. This rule is part of the `Type safety (Type 3) `_ -profile from the C++ Core Guidelines. +profile and `ES.50: Don’t cast away const +`_ +rule from the C++ Core Guidelines. + +Options +------- + +.. option:: StrictMode + + When this setting is set to `true`, it means that any usage of ``const_cast`` + is not allowed. On the other hand, when it's set to `false`, it permits + casting to ``const`` or ``volatile`` types. Default value is `false`. diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/pro-type-const-cast.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/pro-type-const-cast.cpp index 2d32e13723abf..be70e3ba35699 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/pro-type-const-cast.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/pro-type-const-cast.cpp @@ -1,6 +1,86 @@ -// RUN: %check_clang_tidy %s cppcoreguidelines-pro-type-const-cast %t +// RUN: %check_clang_tidy -check-suffix=STRICT %s cppcoreguidelines-pro-type-const-cast %t -- -config="{CheckOptions: {StrictMode: true}}" +// RUN: %check_clang_tidy -check-suffix=NSTRICT %s cppcoreguidelines-pro-type-const-cast %t +namespace Const { const int *i; int *j; -void f() { j = const_cast(i); } -// CHECK-MESSAGES: :[[@LINE-1]]:16: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + +void f() { + j = const_cast(i); + // CHECK-MESSAGES-NSTRICT: :[[@LINE-1]]:7: warning: do not use const_cast to remove const qualifier [cppcoreguidelines-pro-type-const-cast] + // CHECK-MESSAGES-STRICT: :[[@LINE-2]]:7: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + i = const_cast(j); + // CHECK-MESSAGES-STRICT: :[[@LINE-1]]:7: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + j = *const_cast(&i); + // CHECK-MESSAGES-NSTRICT: :[[@LINE-1]]:8: warning: do not use const_cast to remove const qualifier [cppcoreguidelines-pro-type-const-cast] + // CHECK-MESSAGES-STRICT: :[[@LINE-2]]:8: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + i = *const_cast(&j); + // CHECK-MESSAGES-STRICT: :[[@LINE-1]]:8: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + j = &const_cast(*i); + // CHECK-MESSAGES-NSTRICT: :[[@LINE-1]]:8: warning: do not use const_cast to remove const qualifier [cppcoreguidelines-pro-type-const-cast] + // CHECK-MESSAGES-STRICT: :[[@LINE-2]]:8: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + i = &const_cast(*j); + // CHECK-MESSAGES-STRICT: :[[@LINE-1]]:8: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] +} +} + +namespace Volatile { +volatile int *i; +int *j; + +void f() { + j = const_cast(i); + // CHECK-MESSAGES-NSTRICT: :[[@LINE-1]]:7: warning: do not use const_cast to remove volatile qualifier [cppcoreguidelines-pro-type-const-cast] + // CHECK-MESSAGES-STRICT: :[[@LINE-2]]:7: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + i = const_cast(j); + // CHECK-MESSAGES-STRICT: :[[@LINE-1]]:7: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + j = *const_cast(&i); + // CHECK-MESSAGES-NSTRICT: :[[@LINE-1]]:8: warning: do not use const_cast to remove volatile qualifier [cppcoreguidelines-pro-type-const-cast] + // CHECK-MESSAGES-STRICT: :[[@LINE-2]]:8: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + i = *const_cast(&j); + // CHECK-MESSAGES-STRICT: :[[@LINE-1]]:8: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + j = &const_cast(*i); + // CHECK-MESSAGES-NSTRICT: :[[@LINE-1]]:8: warning: do not use const_cast to remove volatile qualifier [cppcoreguidelines-pro-type-const-cast] + // CHECK-MESSAGES-STRICT: :[[@LINE-2]]:8: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + i = &const_cast(*j); + // CHECK-MESSAGES-STRICT: :[[@LINE-1]]:8: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] +} +} + +namespace ConstAndVolatile { +const volatile int *i; +int *j; + +void f() { + j = const_cast(i); + // CHECK-MESSAGES-NSTRICT: :[[@LINE-1]]:7: warning: do not use const_cast to remove const and volatile qualifier [cppcoreguidelines-pro-type-const-cast] + // CHECK-MESSAGES-STRICT: :[[@LINE-2]]:7: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + i = const_cast(j); + // CHECK-MESSAGES-STRICT: :[[@LINE-1]]:7: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + j = *const_cast(&i); + // CHECK-MESSAGES-NSTRICT: :[[@LINE-1]]:8: warning: do not use const_cast to remove const and volatile qualifier [cppcoreguidelines-pro-type-const-cast] + // CHECK-MESSAGES-STRICT: :[[@LINE-2]]:8: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + i = *const_cast(&j); + // CHECK-MESSAGES-STRICT: :[[@LINE-1]]:8: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + j = &const_cast(*i); + // CHECK-MESSAGES-NSTRICT: :[[@LINE-1]]:8: warning: do not use const_cast to remove const and volatile qualifier [cppcoreguidelines-pro-type-const-cast] + // CHECK-MESSAGES-STRICT: :[[@LINE-2]]:8: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] + + i = &const_cast(*j); + // CHECK-MESSAGES-STRICT: :[[@LINE-1]]:8: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] +} +} diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nonstandard-file-extension.test b/clang-tools-extra/test/clang-tidy/infrastructure/nonstandard-file-extension.test index 4cb4d1171195a..9f98d86d0bcc0 100644 --- a/clang-tools-extra/test/clang-tidy/infrastructure/nonstandard-file-extension.test +++ b/clang-tools-extra/test/clang-tidy/infrastructure/nonstandard-file-extension.test @@ -3,4 +3,4 @@ const int *i; int *j; void f() { j = const_cast(i); } -// CHECK-MESSAGES: :[[@LINE-1]]:16: warning: do not use const_cast [cppcoreguidelines-pro-type-const-cast] +// CHECK-MESSAGES: :[[@LINE-1]]:16: warning: do not use const_cast to remove const qualifier [cppcoreguidelines-pro-type-const-cast] From fd06155acb620b047fa1d586383f8738e17dbec3 Mon Sep 17 00:00:00 2001 From: Piotr Zegar Date: Thu, 26 Oct 2023 07:16:25 +0200 Subject: [PATCH 006/877] [clang-tidy] Improved cppcoreguidelines-narrowing-conversions.IgnoreConversionFromTypes (#69242) Extended IgnoreConversionFromTypes option to include types without a declaration, such as built-in types. --- .../NarrowingConversionsCheck.cpp | 38 ++++++++++++++----- clang-tools-extra/docs/ReleaseNotes.rst | 5 +++ ...sions-ignoreconversionfromtypes-option.cpp | 9 ++++- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/NarrowingConversionsCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/NarrowingConversionsCheck.cpp index 1b858db511f50..45fef9471d521 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/NarrowingConversionsCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/NarrowingConversionsCheck.cpp @@ -14,6 +14,7 @@ #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -23,6 +24,26 @@ using namespace clang::ast_matchers; namespace clang::tidy::cppcoreguidelines { +namespace { + +AST_MATCHER_P(QualType, hasAnyType, std::vector, Names) { + if (Names.empty()) + return false; + + std::string Name = Node.getLocalUnqualifiedType().getAsString(); + return llvm::any_of(Names, [&Name](StringRef Ref) { return Ref == Name; }); +} + +AST_MATCHER(FieldDecl, hasIntBitwidth) { + assert(Node.isBitField()); + const ASTContext &Ctx = Node.getASTContext(); + unsigned IntBitWidth = Ctx.getIntWidth(Ctx.IntTy); + unsigned CurrentBitWidth = Node.getBitWidthValue(Ctx); + return IntBitWidth == CurrentBitWidth; +} + +} // namespace + NarrowingConversionsCheck::NarrowingConversionsCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), @@ -53,25 +74,22 @@ void NarrowingConversionsCheck::storeOptions( Options.store(Opts, "PedanticMode", PedanticMode); } -AST_MATCHER(FieldDecl, hasIntBitwidth) { - assert(Node.isBitField()); - const ASTContext &Ctx = Node.getASTContext(); - unsigned IntBitWidth = Ctx.getIntWidth(Ctx.IntTy); - unsigned CurrentBitWidth = Node.getBitWidthValue(Ctx); - return IntBitWidth == CurrentBitWidth; -} - void NarrowingConversionsCheck::registerMatchers(MatchFinder *Finder) { // ceil() and floor() are guaranteed to return integers, even though the type // is not integral. const auto IsCeilFloorCallExpr = expr(callExpr(callee(functionDecl( hasAnyName("::ceil", "::std::ceil", "::floor", "::std::floor"))))); + std::vector IgnoreConversionFromTypesVec = + utils::options::parseStringList(IgnoreConversionFromTypes); + // We may want to exclude other types from the checks, such as `size_type` // and `difference_type`. These are often used to count elements, represented // in 64 bits and assigned to `int`. Rarely are people counting >2B elements. - const auto IsConversionFromIgnoredType = hasType(namedDecl( - hasAnyName(utils::options::parseStringList(IgnoreConversionFromTypes)))); + const auto IsConversionFromIgnoredType = + anyOf(hasType(namedDecl(hasAnyName(IgnoreConversionFromTypesVec))), + allOf(unless(hasType(namedDecl())), + hasType(qualType(hasAnyType(IgnoreConversionFromTypesVec))))); // `IsConversionFromIgnoredType` will ignore narrowing calls from those types, // but not expressions that are promoted to an ignored type as a result of a diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 13003a118c36a..c93775beb8aea 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -243,6 +243,11 @@ Changes in existing checks coroutine functions and increase issue detection for cases involving type aliases with references. +- Improved :doc:`cppcoreguidelines-narrowing-conversions + ` check by + extending the `IgnoreConversionFromTypes` option to include types without a + declaration, such as built-in types. + - Improved :doc:`cppcoreguidelines-prefer-member-initializer ` check to ignore delegate constructors. diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/narrowing-conversions-ignoreconversionfromtypes-option.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/narrowing-conversions-ignoreconversionfromtypes-option.cpp index ab9aabf44ff68..91e908f535a0d 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/narrowing-conversions-ignoreconversionfromtypes-option.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/narrowing-conversions-ignoreconversionfromtypes-option.cpp @@ -4,7 +4,7 @@ // RUN: %check_clang_tidy -check-suffix=IGNORED %s \ // RUN: cppcoreguidelines-narrowing-conversions %t -- \ // RUN: -config='{CheckOptions: { \ -// RUN: cppcoreguidelines-narrowing-conversions.IgnoreConversionFromTypes: "global_size_t;nested_size_type" \ +// RUN: cppcoreguidelines-narrowing-conversions.IgnoreConversionFromTypes: "global_size_t;nested_size_type;long" \ // RUN: }}' // We use global_size_t instead of 'size_t' because windows predefines size_t. @@ -72,3 +72,10 @@ void most_narrowing_is_not_ok() { // CHECK-MESSAGES-DEFAULT: :[[@LINE-1]]:7: warning: narrowing conversion from 'long long' to signed type 'int' is implementation-defined [cppcoreguidelines-narrowing-conversions] // CHECK-MESSAGES-IGNORED: :[[@LINE-2]]:7: warning: narrowing conversion from 'long long' to signed type 'int' is implementation-defined [cppcoreguidelines-narrowing-conversions] } + +void test_ignore_builtin_type_pr58809() { + long x = 123; + short y = x; + // CHECK-MESSAGES-DEFAULT: :[[@LINE-1]]:13: warning: narrowing conversion from 'long' to signed type 'short' is implementation-defined [cppcoreguidelines-narrowing-conversions] + // CHECK-MESSAGES-NOT-IGNORED: :[[@LINE-2]]:13: warning: narrowing conversion from 'long' to signed type 'short' is implementation-defined [cppcoreguidelines-narrowing-conversions] +} From ec6da0652282d29569faa628d2180909fa588906 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 20 Oct 2023 00:35:45 -0700 Subject: [PATCH 007/877] Apply clang-tidy fixes for misc-include-cleaner in MLIR examples --- mlir/examples/toy/Ch2/toyc.cpp | 6 +++++- mlir/examples/toy/Ch3/mlir/Dialect.cpp | 12 +++++++++++ mlir/examples/toy/Ch3/mlir/MLIRGen.cpp | 15 ++++++++++++-- mlir/examples/toy/Ch3/mlir/ToyCombine.cpp | 5 +++-- mlir/examples/toy/Ch3/parser/AST.cpp | 3 +++ mlir/examples/toy/Ch3/toyc.cpp | 9 ++++++++- mlir/examples/toy/Ch4/mlir/Dialect.cpp | 15 ++++++++++++++ mlir/examples/toy/Ch4/mlir/MLIRGen.cpp | 15 ++++++++++++-- .../toy/Ch4/mlir/ShapeInferencePass.cpp | 8 ++++++++ mlir/examples/toy/Ch4/mlir/ToyCombine.cpp | 5 +++-- mlir/examples/toy/Ch4/parser/AST.cpp | 3 +++ mlir/examples/toy/Ch4/toyc.cpp | 9 ++++++++- mlir/examples/toy/Ch5/mlir/Dialect.cpp | 15 ++++++++++++++ .../toy/Ch5/mlir/LowerToAffineLoops.cpp | 18 +++++++++++++++++ mlir/examples/toy/Ch5/mlir/MLIRGen.cpp | 15 ++++++++++++-- .../toy/Ch5/mlir/ShapeInferencePass.cpp | 8 ++++++++ mlir/examples/toy/Ch5/mlir/ToyCombine.cpp | 5 +++-- mlir/examples/toy/Ch5/parser/AST.cpp | 3 +++ mlir/examples/toy/Ch5/toyc.cpp | 9 ++++++++- mlir/examples/toy/Ch6/mlir/Dialect.cpp | 15 ++++++++++++++ .../toy/Ch6/mlir/LowerToAffineLoops.cpp | 18 +++++++++++++++++ mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp | 13 ++++++++++-- mlir/examples/toy/Ch6/mlir/MLIRGen.cpp | 15 ++++++++++++-- .../toy/Ch6/mlir/ShapeInferencePass.cpp | 8 ++++++++ mlir/examples/toy/Ch6/mlir/ToyCombine.cpp | 5 +++-- mlir/examples/toy/Ch6/parser/AST.cpp | 3 +++ mlir/examples/toy/Ch6/toyc.cpp | 11 +++++++++- mlir/examples/toy/Ch7/mlir/Dialect.cpp | 20 +++++++++++++++++++ .../toy/Ch7/mlir/LowerToAffineLoops.cpp | 18 +++++++++++++++++ mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp | 13 ++++++++++-- mlir/examples/toy/Ch7/mlir/MLIRGen.cpp | 18 ++++++++++++++++- .../toy/Ch7/mlir/ShapeInferencePass.cpp | 8 ++++++++ mlir/examples/toy/Ch7/mlir/ToyCombine.cpp | 9 +++++++-- mlir/examples/toy/Ch7/parser/AST.cpp | 3 +++ mlir/examples/toy/Ch7/toyc.cpp | 11 +++++++++- .../transform/Ch2/lib/MyExtension.cpp | 8 ++++++++ 36 files changed, 345 insertions(+), 29 deletions(-) diff --git a/mlir/examples/toy/Ch2/toyc.cpp b/mlir/examples/toy/Ch2/toyc.cpp index 59ac5cab0637b..fa431972e211e 100644 --- a/mlir/examples/toy/Ch2/toyc.cpp +++ b/mlir/examples/toy/Ch2/toyc.cpp @@ -10,15 +10,19 @@ // //===----------------------------------------------------------------------===// +#include "toy/AST.h" #include "toy/Dialect.h" +#include "toy/Lexer.h" #include "toy/MLIRGen.h" #include "toy/Parser.h" #include +#include +#include +#include #include "mlir/IR/AsmState.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/MLIRContext.h" -#include "mlir/IR/Verifier.h" #include "mlir/Parser/Parser.h" #include "llvm/ADT/StringRef.h" diff --git a/mlir/examples/toy/Ch3/mlir/Dialect.cpp b/mlir/examples/toy/Ch3/mlir/Dialect.cpp index 2e492404c3f6c..79d82e59645d9 100644 --- a/mlir/examples/toy/Ch3/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch3/mlir/Dialect.cpp @@ -13,10 +13,22 @@ #include "toy/Dialect.h" +#include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/OpImplementation.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/OperationSupport.h" +#include "mlir/IR/Value.h" #include "mlir/Interfaces/FunctionImplementation.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include +#include using namespace mlir; using namespace mlir::toy; diff --git a/mlir/examples/toy/Ch3/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch3/mlir/MLIRGen.cpp index b1abd37c57257..2f0a88f7095b7 100644 --- a/mlir/examples/toy/Ch3/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch3/mlir/MLIRGen.cpp @@ -12,20 +12,31 @@ //===----------------------------------------------------------------------===// #include "toy/MLIRGen.h" +#include "mlir/IR/Block.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/Value.h" +#include "mlir/Support/LogicalResult.h" #include "toy/AST.h" #include "toy/Dialect.h" -#include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" +#include "toy/Lexer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include +#include +#include #include +#include +#include using namespace mlir::toy; using namespace toy; diff --git a/mlir/examples/toy/Ch3/mlir/ToyCombine.cpp b/mlir/examples/toy/Ch3/mlir/ToyCombine.cpp index 01bd10c2fd173..3ce35c86eb880 100644 --- a/mlir/examples/toy/Ch3/mlir/ToyCombine.cpp +++ b/mlir/examples/toy/Ch3/mlir/ToyCombine.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "mlir/IR/Matchers.h" +#include "mlir/IR/MLIRContext.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/IR/Value.h" +#include "mlir/Support/LogicalResult.h" #include "toy/Dialect.h" -#include using namespace mlir; using namespace toy; diff --git a/mlir/examples/toy/Ch3/parser/AST.cpp b/mlir/examples/toy/Ch3/parser/AST.cpp index 2eaabb1b529e1..2546f2a9725d6 100644 --- a/mlir/examples/toy/Ch3/parser/AST.cpp +++ b/mlir/examples/toy/Ch3/parser/AST.cpp @@ -12,9 +12,12 @@ #include "toy/AST.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" +#include using namespace toy; diff --git a/mlir/examples/toy/Ch3/toyc.cpp b/mlir/examples/toy/Ch3/toyc.cpp index 4b7f41285e066..8c27a7af97a00 100644 --- a/mlir/examples/toy/Ch3/toyc.cpp +++ b/mlir/examples/toy/Ch3/toyc.cpp @@ -10,7 +10,11 @@ // //===----------------------------------------------------------------------===// +#include "mlir/IR/Diagnostics.h" +#include "mlir/Support/LogicalResult.h" +#include "toy/AST.h" #include "toy/Dialect.h" +#include "toy/Lexer.h" #include "toy/MLIRGen.h" #include "toy/Parser.h" @@ -19,7 +23,6 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" #include "mlir/Parser/Parser.h" -#include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/Passes.h" @@ -29,6 +32,10 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include using namespace toy; namespace cl = llvm::cl; diff --git a/mlir/examples/toy/Ch4/mlir/Dialect.cpp b/mlir/examples/toy/Ch4/mlir/Dialect.cpp index de33b57c6a804..cc0ea5c4a6375 100644 --- a/mlir/examples/toy/Ch4/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch4/mlir/Dialect.cpp @@ -13,11 +13,26 @@ #include "toy/Dialect.h" +#include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Location.h" #include "mlir/IR/OpImplementation.h" +#include "mlir/IR/OperationSupport.h" +#include "mlir/IR/ValueRange.h" +#include "mlir/Interfaces/CallInterfaces.h" #include "mlir/Interfaces/FunctionImplementation.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/InliningUtils.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include +#include +#include +#include using namespace mlir; using namespace mlir::toy; diff --git a/mlir/examples/toy/Ch4/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch4/mlir/MLIRGen.cpp index 0b200f105fbf5..6c5474a9646bc 100644 --- a/mlir/examples/toy/Ch4/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch4/mlir/MLIRGen.cpp @@ -12,20 +12,31 @@ //===----------------------------------------------------------------------===// #include "toy/MLIRGen.h" +#include "mlir/IR/Block.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/Value.h" +#include "mlir/Support/LogicalResult.h" #include "toy/AST.h" #include "toy/Dialect.h" -#include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" +#include "toy/Lexer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include +#include +#include #include +#include +#include using namespace mlir::toy; using namespace toy; diff --git a/mlir/examples/toy/Ch4/mlir/ShapeInferencePass.cpp b/mlir/examples/toy/Ch4/mlir/ShapeInferencePass.cpp index d45baa14ab3e8..a9e995ed91bff 100644 --- a/mlir/examples/toy/Ch4/mlir/ShapeInferencePass.cpp +++ b/mlir/examples/toy/Ch4/mlir/ShapeInferencePass.cpp @@ -11,13 +11,21 @@ // //===----------------------------------------------------------------------===// +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/Types.h" #include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/TypeID.h" #include "toy/Dialect.h" #include "toy/Passes.h" #include "toy/ShapeInferenceInterface.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include #define DEBUG_TYPE "shape-inference" diff --git a/mlir/examples/toy/Ch4/mlir/ToyCombine.cpp b/mlir/examples/toy/Ch4/mlir/ToyCombine.cpp index 01bd10c2fd173..3ce35c86eb880 100644 --- a/mlir/examples/toy/Ch4/mlir/ToyCombine.cpp +++ b/mlir/examples/toy/Ch4/mlir/ToyCombine.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "mlir/IR/Matchers.h" +#include "mlir/IR/MLIRContext.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/IR/Value.h" +#include "mlir/Support/LogicalResult.h" #include "toy/Dialect.h" -#include using namespace mlir; using namespace toy; diff --git a/mlir/examples/toy/Ch4/parser/AST.cpp b/mlir/examples/toy/Ch4/parser/AST.cpp index 2eaabb1b529e1..2546f2a9725d6 100644 --- a/mlir/examples/toy/Ch4/parser/AST.cpp +++ b/mlir/examples/toy/Ch4/parser/AST.cpp @@ -12,9 +12,12 @@ #include "toy/AST.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" +#include using namespace toy; diff --git a/mlir/examples/toy/Ch4/toyc.cpp b/mlir/examples/toy/Ch4/toyc.cpp index 5e46b41ed1294..d35e07cf3f20b 100644 --- a/mlir/examples/toy/Ch4/toyc.cpp +++ b/mlir/examples/toy/Ch4/toyc.cpp @@ -10,7 +10,11 @@ // //===----------------------------------------------------------------------===// +#include "mlir/IR/Diagnostics.h" +#include "mlir/Support/LogicalResult.h" +#include "toy/AST.h" #include "toy/Dialect.h" +#include "toy/Lexer.h" #include "toy/MLIRGen.h" #include "toy/Parser.h" #include "toy/Passes.h" @@ -20,7 +24,6 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" #include "mlir/Parser/Parser.h" -#include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/Passes.h" @@ -30,6 +33,10 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include using namespace toy; namespace cl = llvm::cl; diff --git a/mlir/examples/toy/Ch5/mlir/Dialect.cpp b/mlir/examples/toy/Ch5/mlir/Dialect.cpp index 2d474af1300af..74adfeb64cce5 100644 --- a/mlir/examples/toy/Ch5/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch5/mlir/Dialect.cpp @@ -13,11 +13,26 @@ #include "toy/Dialect.h" +#include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Location.h" #include "mlir/IR/OpImplementation.h" +#include "mlir/IR/OperationSupport.h" +#include "mlir/IR/ValueRange.h" +#include "mlir/Interfaces/CallInterfaces.h" #include "mlir/Interfaces/FunctionImplementation.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/InliningUtils.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include +#include +#include +#include using namespace mlir; using namespace mlir::toy; diff --git a/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp index fd589ddf84541..240b9f9338665 100644 --- a/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp +++ b/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp @@ -12,7 +12,17 @@ // //===----------------------------------------------------------------------===// +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/DialectRegistry.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/ValueRange.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Support/TypeID.h" #include "toy/Dialect.h" #include "toy/Passes.h" @@ -22,7 +32,15 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" +#include "llvm/Support/Casting.h" +#include +#include +#include +#include +#include using namespace mlir; diff --git a/mlir/examples/toy/Ch5/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch5/mlir/MLIRGen.cpp index 0b200f105fbf5..6c5474a9646bc 100644 --- a/mlir/examples/toy/Ch5/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch5/mlir/MLIRGen.cpp @@ -12,20 +12,31 @@ //===----------------------------------------------------------------------===// #include "toy/MLIRGen.h" +#include "mlir/IR/Block.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/Value.h" +#include "mlir/Support/LogicalResult.h" #include "toy/AST.h" #include "toy/Dialect.h" -#include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" +#include "toy/Lexer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include +#include +#include #include +#include +#include using namespace mlir::toy; using namespace toy; diff --git a/mlir/examples/toy/Ch5/mlir/ShapeInferencePass.cpp b/mlir/examples/toy/Ch5/mlir/ShapeInferencePass.cpp index d45baa14ab3e8..a9e995ed91bff 100644 --- a/mlir/examples/toy/Ch5/mlir/ShapeInferencePass.cpp +++ b/mlir/examples/toy/Ch5/mlir/ShapeInferencePass.cpp @@ -11,13 +11,21 @@ // //===----------------------------------------------------------------------===// +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/Types.h" #include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/TypeID.h" #include "toy/Dialect.h" #include "toy/Passes.h" #include "toy/ShapeInferenceInterface.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include #define DEBUG_TYPE "shape-inference" diff --git a/mlir/examples/toy/Ch5/mlir/ToyCombine.cpp b/mlir/examples/toy/Ch5/mlir/ToyCombine.cpp index 01bd10c2fd173..3ce35c86eb880 100644 --- a/mlir/examples/toy/Ch5/mlir/ToyCombine.cpp +++ b/mlir/examples/toy/Ch5/mlir/ToyCombine.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "mlir/IR/Matchers.h" +#include "mlir/IR/MLIRContext.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/IR/Value.h" +#include "mlir/Support/LogicalResult.h" #include "toy/Dialect.h" -#include using namespace mlir; using namespace toy; diff --git a/mlir/examples/toy/Ch5/parser/AST.cpp b/mlir/examples/toy/Ch5/parser/AST.cpp index 2eaabb1b529e1..2546f2a9725d6 100644 --- a/mlir/examples/toy/Ch5/parser/AST.cpp +++ b/mlir/examples/toy/Ch5/parser/AST.cpp @@ -12,9 +12,12 @@ #include "toy/AST.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" +#include using namespace toy; diff --git a/mlir/examples/toy/Ch5/toyc.cpp b/mlir/examples/toy/Ch5/toyc.cpp index db09315159168..e0742b8e992b3 100644 --- a/mlir/examples/toy/Ch5/toyc.cpp +++ b/mlir/examples/toy/Ch5/toyc.cpp @@ -11,7 +11,11 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Func/Extensions/AllExtensions.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/Support/LogicalResult.h" +#include "toy/AST.h" #include "toy/Dialect.h" +#include "toy/Lexer.h" #include "toy/MLIRGen.h" #include "toy/Parser.h" #include "toy/Passes.h" @@ -23,7 +27,6 @@ #include "mlir/IR/Verifier.h" #include "mlir/InitAllDialects.h" #include "mlir/Parser/Parser.h" -#include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/Passes.h" @@ -33,6 +36,10 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include using namespace toy; namespace cl = llvm::cl; diff --git a/mlir/examples/toy/Ch6/mlir/Dialect.cpp b/mlir/examples/toy/Ch6/mlir/Dialect.cpp index 2d474af1300af..74adfeb64cce5 100644 --- a/mlir/examples/toy/Ch6/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch6/mlir/Dialect.cpp @@ -13,11 +13,26 @@ #include "toy/Dialect.h" +#include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Location.h" #include "mlir/IR/OpImplementation.h" +#include "mlir/IR/OperationSupport.h" +#include "mlir/IR/ValueRange.h" +#include "mlir/Interfaces/CallInterfaces.h" #include "mlir/Interfaces/FunctionImplementation.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/InliningUtils.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include +#include +#include +#include using namespace mlir; using namespace mlir::toy; diff --git a/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp index fd589ddf84541..240b9f9338665 100644 --- a/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp +++ b/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp @@ -12,7 +12,17 @@ // //===----------------------------------------------------------------------===// +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/DialectRegistry.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/ValueRange.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Support/TypeID.h" #include "toy/Dialect.h" #include "toy/Passes.h" @@ -22,7 +32,15 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" +#include "llvm/Support/Casting.h" +#include +#include +#include +#include +#include using namespace mlir; diff --git a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp index 67b82f91b1dbd..f91d880413c9c 100644 --- a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp @@ -22,6 +22,14 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Support/TypeID.h" #include "toy/Dialect.h" #include "toy/Passes.h" @@ -34,7 +42,6 @@ #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" -#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -42,7 +49,9 @@ #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" -#include "llvm/ADT/Sequence.h" +#include "llvm/Support/Casting.h" +#include +#include using namespace mlir; diff --git a/mlir/examples/toy/Ch6/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch6/mlir/MLIRGen.cpp index 0b200f105fbf5..6c5474a9646bc 100644 --- a/mlir/examples/toy/Ch6/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch6/mlir/MLIRGen.cpp @@ -12,20 +12,31 @@ //===----------------------------------------------------------------------===// #include "toy/MLIRGen.h" +#include "mlir/IR/Block.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/Value.h" +#include "mlir/Support/LogicalResult.h" #include "toy/AST.h" #include "toy/Dialect.h" -#include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" +#include "toy/Lexer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include +#include +#include #include +#include +#include using namespace mlir::toy; using namespace toy; diff --git a/mlir/examples/toy/Ch6/mlir/ShapeInferencePass.cpp b/mlir/examples/toy/Ch6/mlir/ShapeInferencePass.cpp index d45baa14ab3e8..a9e995ed91bff 100644 --- a/mlir/examples/toy/Ch6/mlir/ShapeInferencePass.cpp +++ b/mlir/examples/toy/Ch6/mlir/ShapeInferencePass.cpp @@ -11,13 +11,21 @@ // //===----------------------------------------------------------------------===// +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/Types.h" #include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/TypeID.h" #include "toy/Dialect.h" #include "toy/Passes.h" #include "toy/ShapeInferenceInterface.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include #define DEBUG_TYPE "shape-inference" diff --git a/mlir/examples/toy/Ch6/mlir/ToyCombine.cpp b/mlir/examples/toy/Ch6/mlir/ToyCombine.cpp index 01bd10c2fd173..3ce35c86eb880 100644 --- a/mlir/examples/toy/Ch6/mlir/ToyCombine.cpp +++ b/mlir/examples/toy/Ch6/mlir/ToyCombine.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "mlir/IR/Matchers.h" +#include "mlir/IR/MLIRContext.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/IR/Value.h" +#include "mlir/Support/LogicalResult.h" #include "toy/Dialect.h" -#include using namespace mlir; using namespace toy; diff --git a/mlir/examples/toy/Ch6/parser/AST.cpp b/mlir/examples/toy/Ch6/parser/AST.cpp index 2eaabb1b529e1..2546f2a9725d6 100644 --- a/mlir/examples/toy/Ch6/parser/AST.cpp +++ b/mlir/examples/toy/Ch6/parser/AST.cpp @@ -12,9 +12,12 @@ #include "toy/AST.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" +#include using namespace toy; diff --git a/mlir/examples/toy/Ch6/toyc.cpp b/mlir/examples/toy/Ch6/toyc.cpp index 30522aa46107c..fe2137cfdfbfc 100644 --- a/mlir/examples/toy/Ch6/toyc.cpp +++ b/mlir/examples/toy/Ch6/toyc.cpp @@ -11,7 +11,11 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Func/Extensions/AllExtensions.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Support/LogicalResult.h" +#include "toy/AST.h" #include "toy/Dialect.h" +#include "toy/Lexer.h" #include "toy/MLIRGen.h" #include "toy/Parser.h" #include "toy/Passes.h" @@ -26,7 +30,6 @@ #include "mlir/IR/Verifier.h" #include "mlir/InitAllDialects.h" #include "mlir/Parser/Parser.h" -#include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" @@ -34,6 +37,7 @@ #include "mlir/Transforms/Passes.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorOr.h" @@ -41,6 +45,11 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include using namespace toy; namespace cl = llvm::cl; diff --git a/mlir/examples/toy/Ch7/mlir/Dialect.cpp b/mlir/examples/toy/Ch7/mlir/Dialect.cpp index 8db5d735e1db0..f17173f007645 100644 --- a/mlir/examples/toy/Ch7/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch7/mlir/Dialect.cpp @@ -13,12 +13,32 @@ #include "toy/Dialect.h" +#include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/DialectImplementation.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/MLIRContext.h" #include "mlir/IR/OpImplementation.h" +#include "mlir/IR/OperationSupport.h" +#include "mlir/IR/TypeSupport.h" +#include "mlir/IR/ValueRange.h" +#include "mlir/Interfaces/CallInterfaces.h" #include "mlir/Interfaces/FunctionImplementation.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/InliningUtils.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include +#include +#include +#include +#include using namespace mlir; using namespace mlir::toy; diff --git a/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp index fd589ddf84541..240b9f9338665 100644 --- a/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp +++ b/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp @@ -12,7 +12,17 @@ // //===----------------------------------------------------------------------===// +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/DialectRegistry.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/ValueRange.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Support/TypeID.h" #include "toy/Dialect.h" #include "toy/Passes.h" @@ -22,7 +32,15 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" +#include "llvm/Support/Casting.h" +#include +#include +#include +#include +#include using namespace mlir; diff --git a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp index 67b82f91b1dbd..f91d880413c9c 100644 --- a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp @@ -22,6 +22,14 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Support/TypeID.h" #include "toy/Dialect.h" #include "toy/Passes.h" @@ -34,7 +42,6 @@ #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" -#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -42,7 +49,9 @@ #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" -#include "llvm/ADT/Sequence.h" +#include "llvm/Support/Casting.h" +#include +#include using namespace mlir; diff --git a/mlir/examples/toy/Ch7/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch7/mlir/MLIRGen.cpp index 7f2a4c29ca6b5..0f8e8df38525f 100644 --- a/mlir/examples/toy/Ch7/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch7/mlir/MLIRGen.cpp @@ -12,6 +12,10 @@ //===----------------------------------------------------------------------===// #include "toy/MLIRGen.h" +#include "mlir/IR/Block.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/Value.h" +#include "mlir/Support/LogicalResult.h" #include "toy/AST.h" #include "toy/Dialect.h" @@ -21,12 +25,24 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" +#include "toy/Lexer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include +#include +#include #include #include +#include +#include +#include using namespace mlir::toy; using namespace toy; diff --git a/mlir/examples/toy/Ch7/mlir/ShapeInferencePass.cpp b/mlir/examples/toy/Ch7/mlir/ShapeInferencePass.cpp index d45baa14ab3e8..a9e995ed91bff 100644 --- a/mlir/examples/toy/Ch7/mlir/ShapeInferencePass.cpp +++ b/mlir/examples/toy/Ch7/mlir/ShapeInferencePass.cpp @@ -11,13 +11,21 @@ // //===----------------------------------------------------------------------===// +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/Types.h" #include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/TypeID.h" #include "toy/Dialect.h" #include "toy/Passes.h" #include "toy/ShapeInferenceInterface.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include #define DEBUG_TYPE "shape-inference" diff --git a/mlir/examples/toy/Ch7/mlir/ToyCombine.cpp b/mlir/examples/toy/Ch7/mlir/ToyCombine.cpp index 09be97ea3c015..72f5e4b5c847b 100644 --- a/mlir/examples/toy/Ch7/mlir/ToyCombine.cpp +++ b/mlir/examples/toy/Ch7/mlir/ToyCombine.cpp @@ -11,10 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "mlir/IR/Matchers.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OpDefinition.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/IR/Value.h" +#include "mlir/Support/LogicalResult.h" #include "toy/Dialect.h" -#include +#include "llvm/Support/Casting.h" +#include using namespace mlir; using namespace toy; diff --git a/mlir/examples/toy/Ch7/parser/AST.cpp b/mlir/examples/toy/Ch7/parser/AST.cpp index 3542f8f9e11cf..e38a743a769c6 100644 --- a/mlir/examples/toy/Ch7/parser/AST.cpp +++ b/mlir/examples/toy/Ch7/parser/AST.cpp @@ -12,9 +12,12 @@ #include "toy/AST.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" +#include using namespace toy; diff --git a/mlir/examples/toy/Ch7/toyc.cpp b/mlir/examples/toy/Ch7/toyc.cpp index a83292ecac5fe..d4cc8e7279d3b 100644 --- a/mlir/examples/toy/Ch7/toyc.cpp +++ b/mlir/examples/toy/Ch7/toyc.cpp @@ -11,7 +11,11 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Func/Extensions/AllExtensions.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Support/LogicalResult.h" +#include "toy/AST.h" #include "toy/Dialect.h" +#include "toy/Lexer.h" #include "toy/MLIRGen.h" #include "toy/Parser.h" #include "toy/Passes.h" @@ -26,7 +30,6 @@ #include "mlir/IR/Verifier.h" #include "mlir/InitAllDialects.h" #include "mlir/Parser/Parser.h" -#include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" @@ -34,6 +37,7 @@ #include "mlir/Transforms/Passes.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorOr.h" @@ -41,6 +45,11 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include using namespace toy; namespace cl = llvm::cl; diff --git a/mlir/examples/transform/Ch2/lib/MyExtension.cpp b/mlir/examples/transform/Ch2/lib/MyExtension.cpp index af2890d3b5239..031c52c307382 100644 --- a/mlir/examples/transform/Ch2/lib/MyExtension.cpp +++ b/mlir/examples/transform/Ch2/lib/MyExtension.cpp @@ -15,6 +15,14 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Transform/IR/TransformDialect.h" +#include "mlir/Dialect/Transform/IR/TransformInterfaces.h" +#include "mlir/Dialect/Transform/IR/TransformTypes.h" +#include "mlir/IR/DialectRegistry.h" +#include "mlir/IR/Operation.h" +#include "mlir/Interfaces/SideEffectInterfaces.h" +#include "mlir/Support/LLVM.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" // Define a new transform dialect extension. This uses the CRTP idiom to // identify extensions. From f5063bf7edbb6368deb7354b4340eb62b8329d2e Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Thu, 26 Oct 2023 13:37:50 +0800 Subject: [PATCH 008/877] [clang-tidy][NFC]refactor PreferMemberInitializerCheck for readability --- .../PreferMemberInitializerCheck.cpp | 252 +++++++++--------- 1 file changed, 126 insertions(+), 126 deletions(-) diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp index aac824f15fa57..0ef13ae298033 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp @@ -178,140 +178,140 @@ void PreferMemberInitializerCheck::check( const FieldDecl *Field = nullptr; const Expr *InitValue = nullptr; std::tie(Field, InitValue) = isAssignmentToMemberOf(Class, S, Ctor); - if (Field) { - if (IsUseDefaultMemberInitEnabled && getLangOpts().CPlusPlus11 && - Ctor->isDefaultConstructor() && - (getLangOpts().CPlusPlus20 || !Field->isBitField()) && - !Field->hasInClassInitializer() && - (!isa(Class->getDeclContext()) || - !cast(Class->getDeclContext())->isUnion()) && - shouldBeDefaultMemberInitializer(InitValue)) { - - bool InvalidFix = false; - SourceLocation FieldEnd = - Lexer::getLocForEndOfToken(Field->getSourceRange().getEnd(), 0, - *Result.SourceManager, getLangOpts()); - InvalidFix |= FieldEnd.isInvalid() || FieldEnd.isMacroID(); - SourceLocation SemiColonEnd; - if (auto NextToken = Lexer::findNextToken( - S->getEndLoc(), *Result.SourceManager, getLangOpts())) - SemiColonEnd = NextToken->getEndLoc(); - else - InvalidFix = true; - auto Diag = - diag(S->getBeginLoc(), "%0 should be initialized in an in-class" - " default member initializer") - << Field; - if (InvalidFix) + if (!Field) + continue; + const bool IsInDefaultMemberInitializer = + IsUseDefaultMemberInitEnabled && getLangOpts().CPlusPlus11 && + Ctor->isDefaultConstructor() && + (getLangOpts().CPlusPlus20 || !Field->isBitField()) && + !Field->hasInClassInitializer() && + (!isa(Class->getDeclContext()) || + !cast(Class->getDeclContext())->isUnion()) && + shouldBeDefaultMemberInitializer(InitValue); + if (IsInDefaultMemberInitializer) { + bool InvalidFix = false; + SourceLocation FieldEnd = + Lexer::getLocForEndOfToken(Field->getSourceRange().getEnd(), 0, + *Result.SourceManager, getLangOpts()); + InvalidFix |= FieldEnd.isInvalid() || FieldEnd.isMacroID(); + SourceLocation SemiColonEnd; + if (auto NextToken = Lexer::findNextToken( + S->getEndLoc(), *Result.SourceManager, getLangOpts())) + SemiColonEnd = NextToken->getEndLoc(); + else + InvalidFix = true; + auto Diag = + diag(S->getBeginLoc(), "%0 should be initialized in an in-class" + " default member initializer") + << Field; + if (InvalidFix) + continue; + CharSourceRange StmtRange = + CharSourceRange::getCharRange(S->getBeginLoc(), SemiColonEnd); + + SmallString<128> Insertion( + {UseAssignment ? " = " : "{", + Lexer::getSourceText( + CharSourceRange(InitValue->getSourceRange(), true), + *Result.SourceManager, getLangOpts()), + UseAssignment ? "" : "}"}); + + Diag << FixItHint::CreateInsertion(FieldEnd, Insertion) + << FixItHint::CreateRemoval(StmtRange); + + } else { + StringRef InsertPrefix = ""; + bool HasInitAlready = false; + SourceLocation InsertPos; + SourceRange ReplaceRange; + bool AddComma = false; + bool InvalidFix = false; + unsigned Index = Field->getFieldIndex(); + const CXXCtorInitializer *LastInListInit = nullptr; + for (const CXXCtorInitializer *Init : Ctor->inits()) { + if (!Init->isWritten() || Init->isInClassMemberInitializer()) continue; - CharSourceRange StmtRange = - CharSourceRange::getCharRange(S->getBeginLoc(), SemiColonEnd); - - SmallString<128> Insertion( - {UseAssignment ? " = " : "{", - Lexer::getSourceText( - CharSourceRange(InitValue->getSourceRange(), true), - *Result.SourceManager, getLangOpts()), - UseAssignment ? "" : "}"}); - - Diag << FixItHint::CreateInsertion(FieldEnd, Insertion) - << FixItHint::CreateRemoval(StmtRange); - - } else { - StringRef InsertPrefix = ""; - bool HasInitAlready = false; - SourceLocation InsertPos; - SourceRange ReplaceRange; - bool AddComma = false; - bool InvalidFix = false; - unsigned Index = Field->getFieldIndex(); - const CXXCtorInitializer *LastInListInit = nullptr; - for (const CXXCtorInitializer *Init : Ctor->inits()) { - if (!Init->isWritten() || Init->isInClassMemberInitializer()) - continue; - if (Init->getMember() == Field) { - HasInitAlready = true; - if (isa(Init->getInit())) - InsertPos = Init->getRParenLoc(); - else { - ReplaceRange = Init->getInit()->getSourceRange(); - } - break; - } - if (Init->isMemberInitializer() && - Index < Init->getMember()->getFieldIndex()) { - InsertPos = Init->getSourceLocation(); - // There are initializers after the one we are inserting, so add a - // comma after this insertion in order to not break anything. - AddComma = true; - break; + if (Init->getMember() == Field) { + HasInitAlready = true; + if (isa(Init->getInit())) + InsertPos = Init->getRParenLoc(); + else { + ReplaceRange = Init->getInit()->getSourceRange(); } - LastInListInit = Init; + break; } - if (HasInitAlready) { - if (InsertPos.isValid()) - InvalidFix |= InsertPos.isMacroID(); - else - InvalidFix |= ReplaceRange.getBegin().isMacroID() || - ReplaceRange.getEnd().isMacroID(); - } else { - if (InsertPos.isInvalid()) { - if (LastInListInit) { - InsertPos = Lexer::getLocForEndOfToken( - LastInListInit->getRParenLoc(), 0, *Result.SourceManager, - getLangOpts()); - // Inserting after the last constructor initializer, so we need a - // comma. - InsertPrefix = ", "; - } else { - InsertPos = Lexer::getLocForEndOfToken( - Ctor->getTypeSourceInfo() - ->getTypeLoc() - .getAs() - .getLocalRangeEnd(), - 0, *Result.SourceManager, getLangOpts()); - - // If this is first time in the loop, there are no initializers so - // `:` declares member initialization list. If this is a - // subsequent pass then we have already inserted a `:` so continue - // with a comma. - InsertPrefix = FirstToCtorInits ? " : " : ", "; - } - } + if (Init->isMemberInitializer() && + Index < Init->getMember()->getFieldIndex()) { + InsertPos = Init->getSourceLocation(); + // There are initializers after the one we are inserting, so add a + // comma after this insertion in order to not break anything. + AddComma = true; + break; + } + LastInListInit = Init; + } + if (HasInitAlready) { + if (InsertPos.isValid()) InvalidFix |= InsertPos.isMacroID(); + else + InvalidFix |= ReplaceRange.getBegin().isMacroID() || + ReplaceRange.getEnd().isMacroID(); + } else { + if (InsertPos.isInvalid()) { + if (LastInListInit) { + InsertPos = Lexer::getLocForEndOfToken( + LastInListInit->getRParenLoc(), 0, *Result.SourceManager, + getLangOpts()); + // Inserting after the last constructor initializer, so we need a + // comma. + InsertPrefix = ", "; + } else { + InsertPos = Lexer::getLocForEndOfToken( + Ctor->getTypeSourceInfo() + ->getTypeLoc() + .getAs() + .getLocalRangeEnd(), + 0, *Result.SourceManager, getLangOpts()); + + // If this is first time in the loop, there are no initializers so + // `:` declares member initialization list. If this is a + // subsequent pass then we have already inserted a `:` so continue + // with a comma. + InsertPrefix = FirstToCtorInits ? " : " : ", "; + } } + InvalidFix |= InsertPos.isMacroID(); + } - SourceLocation SemiColonEnd; - if (auto NextToken = Lexer::findNextToken( - S->getEndLoc(), *Result.SourceManager, getLangOpts())) - SemiColonEnd = NextToken->getEndLoc(); + SourceLocation SemiColonEnd; + if (auto NextToken = Lexer::findNextToken( + S->getEndLoc(), *Result.SourceManager, getLangOpts())) + SemiColonEnd = NextToken->getEndLoc(); + else + InvalidFix = true; + + auto Diag = diag(S->getBeginLoc(), "%0 should be initialized in a member" + " initializer of the constructor") + << Field; + if (InvalidFix) + continue; + StringRef NewInit = Lexer::getSourceText( + CharSourceRange(InitValue->getSourceRange(), true), + *Result.SourceManager, getLangOpts()); + if (HasInitAlready) { + if (InsertPos.isValid()) + Diag << FixItHint::CreateInsertion(InsertPos, NewInit); else - InvalidFix = true; - - auto Diag = - diag(S->getBeginLoc(), "%0 should be initialized in a member" - " initializer of the constructor") - << Field; - if (InvalidFix) - continue; - StringRef NewInit = Lexer::getSourceText( - CharSourceRange(InitValue->getSourceRange(), true), - *Result.SourceManager, getLangOpts()); - if (HasInitAlready) { - if (InsertPos.isValid()) - Diag << FixItHint::CreateInsertion(InsertPos, NewInit); - else - Diag << FixItHint::CreateReplacement(ReplaceRange, NewInit); - } else { - SmallString<128> Insertion({InsertPrefix, Field->getName(), "(", - NewInit, AddComma ? "), " : ")"}); - Diag << FixItHint::CreateInsertion(InsertPos, Insertion, - FirstToCtorInits); - FirstToCtorInits = areDiagsSelfContained(); - } - Diag << FixItHint::CreateRemoval( - CharSourceRange::getCharRange(S->getBeginLoc(), SemiColonEnd)); + Diag << FixItHint::CreateReplacement(ReplaceRange, NewInit); + } else { + SmallString<128> Insertion({InsertPrefix, Field->getName(), "(", + NewInit, AddComma ? "), " : ")"}); + Diag << FixItHint::CreateInsertion(InsertPos, Insertion, + FirstToCtorInits); + FirstToCtorInits = areDiagsSelfContained(); } + Diag << FixItHint::CreateRemoval( + CharSourceRange::getCharRange(S->getBeginLoc(), SemiColonEnd)); } } } From 926173c614784149889b2c975adccf52bcece75b Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Thu, 26 Oct 2023 14:51:20 +0900 Subject: [PATCH 009/877] [AArch64] Prevent argument promotion of vector with size > 128 bits (#70034) This patch prevents argument promotion from promoting pointers to fixed-length vector types larger than 128 bits like `<8 x float>` into the values of the pointees. Such vector types are used for SVE VLS but there is no ABI for SVE VLS arguments and the backend cannot lower such value arguments. Fixes #69147 --- .../AArch64/AArch64TargetTransformInfo.cpp | 24 +++ .../AArch64/AArch64TargetTransformInfo.h | 3 + llvm/test/CodeGen/AArch64/arg_promotion.ll | 190 ++++++++++++++++++ 3 files changed, 217 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/arg_promotion.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index d8a0e68d71237..f121dc40e9fe6 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -212,6 +212,30 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, return (CallerBits & CalleeBits) == CalleeBits; } +bool AArch64TTIImpl::areTypesABICompatible( + const Function *Caller, const Function *Callee, + const ArrayRef &Types) const { + if (!BaseT::areTypesABICompatible(Caller, Callee, Types)) + return false; + + // We need to ensure that argument promotion does not attempt to promote + // pointers to fixed-length vector types larger than 128 bits like + // <8 x float> (and pointers to aggregate types which have such fixed-length + // vector type members) into the values of the pointees. Such vector types + // are used for SVE VLS but there is no ABI for SVE VLS arguments and the + // backend cannot lower such value arguments. The 128-bit fixed-length SVE + // types can be safely treated as 128-bit NEON types and they cannot be + // distinguished in IR. + if (ST->useSVEForFixedLengthVectors() && llvm::any_of(Types, [](Type *Ty) { + auto FVTy = dyn_cast(Ty); + return FVTy && + FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128; + })) + return false; + + return true; +} + bool AArch64TTIImpl::shouldMaximizeVectorBandwidth( TargetTransformInfo::RegisterKind K) const { assert(K != TargetTransformInfo::RGK_Scalar); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 0501c42285232..c08004ad299fd 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -77,6 +77,9 @@ class AArch64TTIImpl : public BasicTTIImplBase { bool areInlineCompatible(const Function *Caller, const Function *Callee) const; + bool areTypesABICompatible(const Function *Caller, const Function *Callee, + const ArrayRef &Types) const; + /// \name Scalar TTI Implementations /// @{ diff --git a/llvm/test/CodeGen/AArch64/arg_promotion.ll b/llvm/test/CodeGen/AArch64/arg_promotion.ll new file mode 100644 index 0000000000000..cc37d230c6cbe --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arg_promotion.ll @@ -0,0 +1,190 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py + +; RUN: opt -S -passes=argpromotion -mtriple=aarch64-unknwon-linux-gnu < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Don't promote a vector pointer argument when the pointee type size is greater +; than 128 bits. + +define dso_local void @caller_8xi32(ptr noalias %src, ptr noalias %dst) #0 { +; CHECK-LABEL: define dso_local void @caller_8xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: call fastcc void @callee_8xi32(ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) +; CHECK-NEXT: ret void +; +entry: + call fastcc void @callee_8xi32(ptr noalias %src, ptr noalias %dst) + ret void +} + +define internal fastcc void @callee_8xi32(ptr noalias %src, ptr noalias %dst) #0 { +; CHECK-LABEL: define internal fastcc void @callee_8xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[DST:%.*]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load <8 x i32>, ptr %src, align 16 + store <8 x i32> %0, ptr %dst, align 16 + ret void +} + +; Promote a vector pointer argument when the pointee type size is 128 bits or +; less. + +define dso_local void @caller_4xi32(ptr noalias %src, ptr noalias %dst) #1 { +; CHECK-LABEL: define dso_local void @caller_4xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: call fastcc void @callee_4xi32(<4 x i32> [[SRC_VAL]], ptr noalias [[DST:%.*]]) +; CHECK-NEXT: ret void +; +entry: + call fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst) + ret void +} + +define internal fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst) #1 { +; CHECK-LABEL: define internal fastcc void @callee_4xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load <4 x i32>, ptr %src, align 16 + store <4 x i32> %0, ptr %dst, align 16 + ret void +} + +; A scalar pointer argument is promoted even when the pointee type size is +; greater than 128 bits. + +define dso_local void @caller_i256(ptr noalias %src, ptr noalias %dst) #0 { +; CHECK-LABEL: define dso_local void @caller_i256( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SRC_VAL:%.*]] = load i256, ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: call fastcc void @callee_i256(i256 [[SRC_VAL]], ptr noalias [[DST:%.*]]) +; CHECK-NEXT: ret void +; +entry: + call fastcc void @callee_i256(ptr noalias %src, ptr noalias %dst) + ret void +} + +define internal fastcc void @callee_i256(ptr noalias %src, ptr noalias %dst) #0 { +; CHECK-LABEL: define internal fastcc void @callee_i256( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i256 [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load i256, ptr %src, align 16 + store i256 %0, ptr %dst, align 16 + ret void +} + +; A scalable vector pointer argument is not a target of ArgumentPromotionPass. + +define dso_local void @caller_nx4xi32(ptr noalias %src, ptr noalias %dst) #2 { +; CHECK-LABEL: define dso_local void @caller_nx4xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: call fastcc void @callee_nx4xi32(ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) +; CHECK-NEXT: ret void +; +entry: + call fastcc void @callee_nx4xi32(ptr noalias %src, ptr noalias %dst) + ret void +} + +define internal fastcc void @callee_nx4xi32(ptr noalias %src, ptr noalias %dst) #2 { +; CHECK-LABEL: define internal fastcc void @callee_nx4xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: store [[TMP0]], ptr [[DST:%.*]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load , ptr %src, align 16 + store %0, ptr %dst, align 16 + ret void +} + +; Don't promote a structure pointer argument when the pointee vector member +; type size is greater than 128 bits. + +%struct_8xi32 = type { <8 x i32>, <8 x i32> } + +define dso_local void @caller_struct8xi32(ptr noalias %src, ptr noalias %dst) #0 { +; CHECK-LABEL: define dso_local void @caller_struct8xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: call fastcc void @callee_struct8xi32(ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) +; CHECK-NEXT: ret void +; +entry: + call fastcc void @callee_struct8xi32(ptr noalias %src, ptr noalias %dst) + ret void +} + +define internal fastcc void @callee_struct8xi32(ptr noalias %src, ptr noalias %dst) #0 { +; CHECK-LABEL: define internal fastcc void @callee_struct8xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[DST:%.*]], align 16 +; CHECK-NEXT: [[SRC2:%.*]] = getelementptr inbounds [[STRUCT_8XI32:%.*]], ptr [[SRC]], i64 0, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[SRC2]], align 16 +; CHECK-NEXT: [[DST2:%.*]] = getelementptr inbounds [[STRUCT_8XI32]], ptr [[DST]], i64 0, i32 1 +; CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[DST2]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load <8 x i32>, ptr %src, align 16 + store <8 x i32> %0, ptr %dst, align 16 + %src2 = getelementptr inbounds %struct_8xi32, ptr %src, i64 0, i32 1 + %1 = load <8 x i32>, ptr %src2, align 16 + %dst2 = getelementptr inbounds %struct_8xi32, ptr %dst, i64 0, i32 1 + store <8 x i32> %1, ptr %dst2, align 16 + ret void +} + +; Promote a structure pointer argument when the pointee vector member type size +; is 128 bits or less. + +%struct_4xi32 = type { <4 x i32>, <4 x i32> } + +define dso_local void @caller_struct4xi32(ptr noalias %src, ptr noalias %dst) #1 { +; CHECK-LABEL: define dso_local void @caller_struct4xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC]], i64 16 +; CHECK-NEXT: [[SRC_VAL1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 16 +; CHECK-NEXT: call fastcc void @callee_struct4xi32(<4 x i32> [[SRC_VAL]], <4 x i32> [[SRC_VAL1]], ptr noalias [[DST:%.*]]) +; CHECK-NEXT: ret void +; +entry: + call fastcc void @callee_struct4xi32(ptr noalias %src, ptr noalias %dst) + ret void +} + +define internal fastcc void @callee_struct4xi32(ptr noalias %src, ptr noalias %dst) #1 { +; CHECK-LABEL: define internal fastcc void @callee_struct4xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16 +; CHECK-NEXT: [[DST2:%.*]] = getelementptr inbounds [[STRUCT_4XI32:%.*]], ptr [[DST]], i64 0, i32 1 +; CHECK-NEXT: store <4 x i32> [[SRC_16_VAL:%.*]], ptr [[DST2]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load <4 x i32>, ptr %src, align 16 + store <4 x i32> %0, ptr %dst, align 16 + %src2 = getelementptr inbounds %struct_4xi32, ptr %src, i64 0, i32 1 + %1 = load <4 x i32>, ptr %src2, align 16 + %dst2 = getelementptr inbounds %struct_4xi32, ptr %dst, i64 0, i32 1 + store <4 x i32> %1, ptr %dst2, align 16 + ret void +} + +attributes #0 = { noinline vscale_range(2,2) "target-features"="+v8.2a,+neon,+sve" } +attributes #1 = { noinline vscale_range(1,1) "target-features"="+v8.2a,+neon,+sve" } +attributes #2 = { noinline "target-features"="+v8.2a,+neon,+sve" } From a6d509fadbf7565baf336c2e25d1798fd40e59c9 Mon Sep 17 00:00:00 2001 From: Tobias Hieta Date: Thu, 26 Oct 2023 08:31:33 +0200 Subject: [PATCH 010/877] [Support] Better error msg when cache dir can't be created. (#69575) On windows if you passed /lldltocache:D:\tmp to lld and you didn't have D: mounted it fail to create the cache dir D:\tmp, but the error message is pretty hard to understand: ``` c:\code\llvm\llvm-project\out\debug>bin\lld-link.exe /lldltocache:D:\tmp hello.obj LLVM ERROR: no such file or directory PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace. Exception Code: 0xC000001D ``` Which lead one of our users to report this as a crash. I have just added a bit better message so it now says: ``` c:\code\llvm\llvm-project\out\debug>bin\lld-link.exe /lldltocache:D:\tmp hello.obj LLVM ERROR: Can't create cache directory: D:\tmp PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace. ``` I am not sure this is a fatal error because it's not something that really should be reported as a bug to LLVM. But at least this gives a bit more visibility on what to change. --- lld/test/COFF/lto-cache-errors.ll | 20 ++++++++++++++++++++ llvm/lib/Support/Caching.cpp | 4 +++- 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 lld/test/COFF/lto-cache-errors.ll diff --git a/lld/test/COFF/lto-cache-errors.ll b/lld/test/COFF/lto-cache-errors.ll new file mode 100644 index 0000000000000..55244e5690dc3 --- /dev/null +++ b/lld/test/COFF/lto-cache-errors.ll @@ -0,0 +1,20 @@ +; REQUIRES: x86 +;; Not supported on windows since we use permissions to deny the creation +; UNSUPPORTED: system-windows + +; RUN: opt -module-hash -module-summary %s -o %t.o +; RUN: opt -module-hash -module-summary %p/Inputs/lto-cache.ll -o %t2.o +; RUN: rm -Rf %t.cache && mkdir %t.cache +; RUN: chmod 444 %t.cache + +;; Check emit warnings when we can't create the cache dir +; RUN: not --crash lld-link /lldltocache:%t.cache/nonexistant/ /out:%t3 /entry:main %t2.o %t.o 2>&1 | FileCheck %s +; CHECK: LLVM ERROR: can't create cache directory {{.*}}/nonexistant/: Permission denied + +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +define void @globalfunc() #0 { +entry: + ret void +} diff --git a/llvm/lib/Support/Caching.cpp b/llvm/lib/Support/Caching.cpp index f20f08a865c76..628e23e1cb3d1 100644 --- a/llvm/lib/Support/Caching.cpp +++ b/llvm/lib/Support/Caching.cpp @@ -145,7 +145,9 @@ Expected llvm::localCache(const Twine &CacheNameRef, // ensures the filesystem isn't mutated until the cache is. if (std::error_code EC = sys::fs::create_directories( CacheDirectoryPath, /*IgnoreExisting=*/true)) - return errorCodeToError(EC); + return createStringError(EC, Twine("can't create cache directory ") + + CacheDirectoryPath + ": " + + EC.message()); // Write to a temporary to avoid race condition SmallString<64> TempFilenameModel; From c285b7f5139d0870d5ccbdc3f73b254004211030 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 26 Oct 2023 07:59:25 +0100 Subject: [PATCH 011/877] [RISCV] Add tests for vmadd for VP intrinsics. NFC (#70042) We have VP tests for vmacc but not vmadd. This copies the vmacc tests but swaps the false operand of vp.merge to be the multiplicand instead of the addend. This shows how we could fold the vmerge into the vmadd's mask if we commuted %a and %b. --- llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll | 2427 +++++++++++++++++++++++ 1 file changed, 2427 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll new file mode 100644 index 0000000000000..6a6d7d2a41424 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll @@ -0,0 +1,2427 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.mul.nxv1i8(, , , i32) +declare @llvm.vp.add.nxv1i8(, , , i32) +declare @llvm.vp.merge.nxv1i8(, , , i32) +declare @llvm.vp.select.nxv1i8(, , , i32) + +define @vmadd_vv_nxv1i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv1i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv1i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv1i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv1i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv1i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv1i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv1i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i8( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv2i8(, , , i32) +declare @llvm.vp.add.nxv2i8(, , , i32) +declare @llvm.vp.merge.nxv2i8(, , , i32) +declare @llvm.vp.select.nxv2i8(, , , i32) + +define @vmadd_vv_nxv2i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv2i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv2i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv2i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv2i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv2i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv2i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv2i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i8( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv4i8(, , , i32) +declare @llvm.vp.add.nxv4i8(, , , i32) +declare @llvm.vp.merge.nxv4i8(, , , i32) +declare @llvm.vp.select.nxv4i8(, , , i32) + +define @vmadd_vv_nxv4i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv4i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv4i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv4i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv4i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv4i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv4i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv4i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i8( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv8i8(, , , i32) +declare @llvm.vp.add.nxv8i8(, , , i32) +declare @llvm.vp.merge.nxv8i8(, , , i32) +declare @llvm.vp.select.nxv8i8(, , , i32) + +define @vmadd_vv_nxv8i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv8i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv8i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv8i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv8i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv8i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv8i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv8i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i8( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv16i8(, , , i32) +declare @llvm.vp.add.nxv16i8(, , , i32) +declare @llvm.vp.merge.nxv16i8(, , , i32) +declare @llvm.vp.select.nxv16i8(, , , i32) + +define @vmadd_vv_nxv16i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vmadd.vv v10, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv16i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vmadd.vv v10, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, tu, ma +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv16i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v10, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv16i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv16i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv16i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vmadd.vv v10, v8, v12 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv16i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv16i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmacc.vx v10, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i8( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv32i8(, , , i32) +declare @llvm.vp.add.nxv32i8(, , , i32) +declare @llvm.vp.merge.nxv32i8(, , , i32) +declare @llvm.vp.select.nxv32i8(, , , i32) + +define @vmadd_vv_nxv32i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vmadd.vv v12, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv32i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vmadd.vv v12, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, tu, ma +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv32i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v12, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv32i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv32i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv32i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vmadd.vv v12, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv32i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv32i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmacc.vx v12, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32i8( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv64i8(, , , i32) +declare @llvm.vp.add.nxv64i8(, , , i32) +declare @llvm.vp.merge.nxv64i8(, , , i32) +declare @llvm.vp.select.nxv64i8(, , , i32) + +define @vmadd_vv_nxv64i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv64i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv64i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv64i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv64i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv64i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv64i8( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv64i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv64i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv64i8( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv1i16(, , , i32) +declare @llvm.vp.add.nxv1i16(, , , i32) +declare @llvm.vp.merge.nxv1i16(, , , i32) +declare @llvm.vp.select.nxv1i16(, , , i32) + +define @vmadd_vv_nxv1i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv1i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv1i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv1i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv1i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv1i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv1i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv1i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i16( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv2i16(, , , i32) +declare @llvm.vp.add.nxv2i16(, , , i32) +declare @llvm.vp.merge.nxv2i16(, , , i32) +declare @llvm.vp.select.nxv2i16(, , , i32) + +define @vmadd_vv_nxv2i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv2i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv2i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv2i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv2i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv2i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv2i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv2i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i16( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv4i16(, , , i32) +declare @llvm.vp.add.nxv4i16(, , , i32) +declare @llvm.vp.merge.nxv4i16(, , , i32) +declare @llvm.vp.select.nxv4i16(, , , i32) + +define @vmadd_vv_nxv4i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv4i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv4i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv4i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv4i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv4i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv4i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv4i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i16( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv8i16(, , , i32) +declare @llvm.vp.add.nxv8i16(, , , i32) +declare @llvm.vp.merge.nxv8i16(, , , i32) +declare @llvm.vp.select.nxv8i16(, , , i32) + +define @vmadd_vv_nxv8i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmadd.vv v10, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv8i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmadd.vv v10, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv8i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v10, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv8i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv8i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv8i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmadd.vv v10, v8, v12 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv8i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv8i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vmacc.vx v10, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i16( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv16i16(, , , i32) +declare @llvm.vp.add.nxv16i16(, , , i32) +declare @llvm.vp.merge.nxv16i16(, , , i32) +declare @llvm.vp.select.nxv16i16(, , , i32) + +define @vmadd_vv_nxv16i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmadd.vv v12, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv16i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmadd.vv v12, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, tu, ma +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv16i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v12, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv16i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv16i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv16i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmadd.vv v12, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv16i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv16i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmacc.vx v12, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i16( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv32i16(, , , i32) +declare @llvm.vp.add.nxv32i16(, , , i32) +declare @llvm.vp.merge.nxv32i16(, , , i32) +declare @llvm.vp.select.nxv32i16(, , , i32) + +define @vmadd_vv_nxv32i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv32i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv32i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv32i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv32i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv32i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32i16( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv32i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv32i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32i16( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv1i32(, , , i32) +declare @llvm.vp.add.nxv1i32(, , , i32) +declare @llvm.vp.merge.nxv1i32(, , , i32) +declare @llvm.vp.select.nxv1i32(, , , i32) + +define @vmadd_vv_nxv1i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv1i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv1i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv1i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv1i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv1i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv1i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv1i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i32( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv2i32(, , , i32) +declare @llvm.vp.add.nxv2i32(, , , i32) +declare @llvm.vp.merge.nxv2i32(, , , i32) +declare @llvm.vp.select.nxv2i32(, , , i32) + +define @vmadd_vv_nxv2i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv2i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv2i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv2i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv2i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv2i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv2i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv2i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i32( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv4i32(, , , i32) +declare @llvm.vp.add.nxv4i32(, , , i32) +declare @llvm.vp.merge.nxv4i32(, , , i32) +declare @llvm.vp.select.nxv4i32(, , , i32) + +define @vmadd_vv_nxv4i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmadd.vv v10, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv4i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmadd.vv v10, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv4i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v10, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv4i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv4i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv4i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmadd.vv v10, v8, v12 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv4i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv4i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmacc.vx v10, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i32( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv8i32(, , , i32) +declare @llvm.vp.add.nxv8i32(, , , i32) +declare @llvm.vp.merge.nxv8i32(, , , i32) +declare @llvm.vp.select.nxv8i32(, , , i32) + +define @vmadd_vv_nxv8i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmadd.vv v12, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv8i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmadd.vv v12, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, ma +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv8i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v12, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv8i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv8i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv8i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmadd.vv v12, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv8i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv8i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmacc.vx v12, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i32( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv16i32(, , , i32) +declare @llvm.vp.add.nxv16i32(, , , i32) +declare @llvm.vp.merge.nxv16i32(, , , i32) +declare @llvm.vp.select.nxv16i32(, , , i32) + +define @vmadd_vv_nxv16i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv16i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv16i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv16i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv16i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv16i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i32( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv16i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vx_nxv16i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i32( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv1i64(, , , i32) +declare @llvm.vp.add.nxv1i64(, , , i32) +declare @llvm.vp.merge.nxv1i64(, , , i32) +declare @llvm.vp.select.nxv1i64(, , , i32) + +define @vmadd_vv_nxv1i64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv1i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv1i64( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmadd_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vmadd.vv v10, v8, v9 +; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma +; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vmadd.vx v8, a0, v9, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv1i64_unmasked( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmadd_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vmadd.vv v10, v8, v9 +; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma +; RV64-NEXT: vmadd.vx v8, a0, v9 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv1i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv1i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmadd.vv v9, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i64( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv1i64_ta( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmadd_vx_nxv1i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vmadd.vv v10, v8, v9 +; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv1i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmacc.vx v9, a0, v8 +; RV64-NEXT: vmerge.vvm v8, v8, v9, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i64( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv2i64(, , , i32) +declare @llvm.vp.add.nxv2i64(, , , i32) +declare @llvm.vp.merge.nxv2i64(, , , i32) +declare @llvm.vp.select.nxv2i64(, , , i32) + +define @vmadd_vv_nxv2i64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmadd.vv v10, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv2i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmadd.vv v10, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, ma +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv2i64( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmadd_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vmadd.vv v12, v8, v10 +; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma +; RV32-NEXT: vmerge.vvm v8, v8, v12, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu +; RV64-NEXT: vmadd.vx v8, a0, v10, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv2i64_unmasked( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmadd_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vmadd.vv v12, v8, v10 +; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma +; RV64-NEXT: vmadd.vx v8, a0, v10 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv2i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv2i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmadd.vv v10, v8, v12 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i64( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv2i64_ta( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmadd_vx_nxv2i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vmadd.vv v12, v8, v10 +; RV32-NEXT: vmerge.vvm v8, v8, v12, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv2i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmacc.vx v10, a0, v8 +; RV64-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i64( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv4i64(, , , i32) +declare @llvm.vp.add.nxv4i64(, , , i32) +declare @llvm.vp.merge.nxv4i64(, , , i32) +declare @llvm.vp.select.nxv4i64(, , , i32) + +define @vmadd_vv_nxv4i64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmadd.vv v12, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv4i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmadd.vv v12, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv4i64( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmadd_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vmadd.vv v16, v8, v12 +; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu +; RV64-NEXT: vmadd.vx v8, a0, v12, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv4i64_unmasked( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmadd_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vmadd.vv v16, v8, v12 +; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma +; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma +; RV64-NEXT: vmadd.vx v8, a0, v12 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv4i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv4i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmadd.vv v12, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i64( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv4i64_ta( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmadd_vx_nxv4i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vmadd.vv v16, v8, v12 +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv4i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmacc.vx v12, a0, v8 +; RV64-NEXT: vmerge.vvm v8, v8, v12, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i64( %m, %y, %a, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv8i64(, , , i32) +declare @llvm.vp.add.nxv8i64(, , , i32) +declare @llvm.vp.merge.nxv8i64(, , , i32) +declare @llvm.vp.select.nxv8i64(, , , i32) + +define @vmadd_vv_nxv8i64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv8i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv8i64( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmadd_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vmadd.vv v24, v8, v16 +; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; RV64-NEXT: vmadd.vx v8, a0, v16, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv8i64_unmasked( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmadd_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vmadd.vv v24, v8, v16 +; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma +; RV32-NEXT: vmv.v.v v8, v24 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; RV64-NEXT: vmadd.vx v8, a0, v16 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( %allones, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vv_nxv8i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmadd_vv_nxv8i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i64( %m, %y, %a, i32 %evl) + ret %u +} + +define @vmadd_vx_nxv8i64_ta( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmadd_vx_nxv8i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vmadd.vv v24, v8, v16 +; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv8i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmacc.vx v16, a0, v8 +; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i64( %m, %y, %a, i32 %evl) + ret %u +} From 897cc8a7d7c0e47686322dbd4b95ecad30bb2298 Mon Sep 17 00:00:00 2001 From: Shivam Gupta Date: Thu, 26 Oct 2023 12:39:48 +0530 Subject: [PATCH 012/877] [RecursiveASTVisitor] Fix RecursiveASTVisitor (RAV) fails to visit the initializer of a bitfield (#69557) The problem was introduced in the commit https://github.com/llvm/llvm-project/commit/6b8e3c02ca44fb6c3738bb0c75859c11a03e30ed when the possibility of initialized bitfields was added, but the logic in RecursiveASTVisitor was not updated. This PR fixed that. This fixes https://github.com/llvm/llvm-project/issues/64916. Patch by Scott McPeak --------- Co-authored-by: cor3ntin --- clang/docs/ReleaseNotes.rst | 3 ++ clang/include/clang/AST/RecursiveASTVisitor.h | 2 +- clang/unittests/Tooling/CMakeLists.txt | 1 + .../BitfieldInitializer.cpp | 34 +++++++++++++++++++ 4 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 clang/unittests/Tooling/RecursiveASTVisitorTests/BitfieldInitializer.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 42f20b9a9bb04..074116d2edf9f 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -650,6 +650,9 @@ Bug Fixes to AST Handling `Issue 64170 `_ - Fixed ``hasAnyBase`` not binding nodes in its submatcher. (`#65421 `_) +- Fixed a bug where RecursiveASTVisitor fails to visit the + initializer of a bitfield. + `Issue 64916 `_ Miscellaneous Bug Fixes ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 3dd23eb38eeab..53bc15e1b19f6 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2103,7 +2103,7 @@ DEF_TRAVERSE_DECL(FieldDecl, { TRY_TO(TraverseDeclaratorHelper(D)); if (D->isBitField()) TRY_TO(TraverseStmt(D->getBitWidth())); - else if (D->hasInClassInitializer()) + if (D->hasInClassInitializer()) TRY_TO(TraverseStmt(D->getInClassInitializer())); }) diff --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt index 2fbe78e3fab75..5a10a6b285390 100644 --- a/clang/unittests/Tooling/CMakeLists.txt +++ b/clang/unittests/Tooling/CMakeLists.txt @@ -25,6 +25,7 @@ add_clang_unittest(ToolingTests QualTypeNamesTest.cpp RangeSelectorTest.cpp RecursiveASTVisitorTests/Attr.cpp + RecursiveASTVisitorTests/BitfieldInitializer.cpp RecursiveASTVisitorTests/CallbacksLeaf.cpp RecursiveASTVisitorTests/CallbacksUnaryOperator.cpp RecursiveASTVisitorTests/CallbacksBinaryOperator.cpp diff --git a/clang/unittests/Tooling/RecursiveASTVisitorTests/BitfieldInitializer.cpp b/clang/unittests/Tooling/RecursiveASTVisitorTests/BitfieldInitializer.cpp new file mode 100644 index 0000000000000..c11e726fe8552 --- /dev/null +++ b/clang/unittests/Tooling/RecursiveASTVisitorTests/BitfieldInitializer.cpp @@ -0,0 +1,34 @@ +//===- unittest/Tooling/RecursiveASTVisitorTests/BitfieldInitializer.cpp -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TestVisitor.h" +#include + +using namespace clang; + +namespace { + +// Check to ensure that bitfield initializers are visited. +class BitfieldInitializerVisitor + : public ExpectedLocationVisitor { +public: + bool VisitIntegerLiteral(IntegerLiteral *IL) { + Match(std::to_string(IL->getValue().getSExtValue()), IL->getLocation()); + return true; + } +}; + +TEST(RecursiveASTVisitor, BitfieldInitializerIsVisited) { + BitfieldInitializerVisitor Visitor; + Visitor.ExpectMatch("123", 2, 15); + EXPECT_TRUE(Visitor.runOver("struct S {\n" + " int x : 8 = 123;\n" + "};\n")); +} + +} // end anonymous namespace From 6e2d67e7d66f46fbe4f4c35c7c59d3a8706ea8b0 Mon Sep 17 00:00:00 2001 From: Yeting Kuo <46629943+yetingk@users.noreply.github.com> Date: Thu, 26 Oct 2023 15:10:57 +0800 Subject: [PATCH 013/877] [RISCV] Support predefined macro __riscv_misaligned_[fast,avoid]. (#65756) RISC-V C API introduced predefined macro to achieve hints about unaligned accesses ([pr]). This patch defines __riscv_misaligned_fast when using -mno-strict-align, otherwise, defines __riscv_misaligned_avoid. Note: This ignores __riscv_misaligned_slow which is also defined by spec. [pr]: https://github.com/riscv-non-isa/riscv-c-api-doc/pull/40 --- clang/lib/Basic/Targets/RISCV.cpp | 7 +++++++ clang/lib/Basic/Targets/RISCV.h | 3 +++ clang/test/Preprocessor/riscv-target-features.c | 12 ++++++++++++ 3 files changed, 22 insertions(+) diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 5f75619b74554..0b9ebeaf5e75b 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -210,6 +210,11 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, if (VScale && VScale->first && VScale->first == VScale->second) Builder.defineMacro("__riscv_v_fixed_vlen", Twine(VScale->first * llvm::RISCV::RVVBitsPerBlock)); + + if (FastUnalignedAccess) + Builder.defineMacro("__riscv_misaligned_fast"); + else + Builder.defineMacro("__riscv_misaligned_avoid"); } static constexpr Builtin::Info BuiltinInfo[] = { @@ -328,6 +333,8 @@ bool RISCVTargetInfo::handleTargetFeatures(std::vector &Features, if (ISAInfo->hasExtension("zfh") || ISAInfo->hasExtension("zhinx")) HasLegalHalfType = true; + FastUnalignedAccess = llvm::is_contained(Features, "+unaligned-scalar-mem"); + return true; } diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h index 6be0e49ca2f55..e5424d318401f 100644 --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -29,6 +29,9 @@ class RISCVTargetInfo : public TargetInfo { std::string ABI, CPU; std::unique_ptr ISAInfo; +private: + bool FastUnalignedAccess; + public: RISCVTargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple) { diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index ffdec34ca615f..3a0435f9c9790 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -1246,3 +1246,15 @@ // RUN: -march=rv64i_zve32x_zvkt1p0 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZVKT-EXT %s // CHECK-ZVKT-EXT: __riscv_zvkt 1000000{{$}} + +// RUN: %clang --target=riscv32-unknown-linux-gnu -march=rv32i -x c -E -dM %s \ +// RUN: -o - | FileCheck %s --check-prefix=CHECK-MISALIGNED-AVOID +// RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64i -x c -E -dM %s \ +// RUN: -o - | FileCheck %s --check-prefix=CHECK-MISALIGNED-AVOID +// CHECK-MISALIGNED-AVOID: __riscv_misaligned_avoid 1 + +// RUN: %clang --target=riscv32-unknown-linux-gnu -march=rv32i -E -dM %s \ +// RUN: -munaligned-access -o - | FileCheck %s --check-prefix=CHECK-MISALIGNED-FAST +// RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64i -E -dM %s \ +// RUN: -munaligned-access -o - | FileCheck %s --check-prefix=CHECK-MISALIGNED-FAST +// CHECK-MISALIGNED-FAST: __riscv_misaligned_fast 1 From 3b59b3ef449d8296ed5ac09631ca383e139ec700 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 26 Oct 2023 07:18:32 +0000 Subject: [PATCH 014/877] [gn build] Port 897cc8a7d7c0 --- llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn index e784b39cd0f18..43f8b886f9b97 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn @@ -44,6 +44,7 @@ unittest("ToolingTests") { "RecursiveASTVisitorTestPostOrderVisitor.cpp", "RecursiveASTVisitorTestTypeLocVisitor.cpp", "RecursiveASTVisitorTests/Attr.cpp", + "RecursiveASTVisitorTests/BitfieldInitializer.cpp", "RecursiveASTVisitorTests/CXXBoolLiteralExpr.cpp", "RecursiveASTVisitorTests/CXXMemberCall.cpp", "RecursiveASTVisitorTests/CXXMethodDecl.cpp", From 46028022407d003b8af4ddf27a4679de4891f10d Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 26 Oct 2023 00:26:54 -0700 Subject: [PATCH 015/877] [AMDGPU] Shrink to SOPK with 32-bit signed literals (#70263) A literal like 0xffff8000 is valid to be used as KIMM in a SOPK instruction, but at the moment our checks expect it to be fully sign extended to a 64-bit signed integer. This is not required since all cases which are being shrunk only accept 32-bit operands. We need to sign extend the operand to 64-bit though so it passes the verifier and properly printed. --- .../Target/AMDGPU/SIShrinkInstructions.cpp | 15 +++-- llvm/test/CodeGen/AMDGPU/shrink-i32-kimm.mir | 57 +++++++++++++++++++ 2 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/shrink-i32-kimm.mir diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 88c75a0f86a6c..856121be78031 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -159,7 +159,7 @@ bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const { } bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const { - return isInt<16>(Src.getImm()) && + return isInt<16>(SignExtend64(Src.getImm(), 32)) && !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo()); } @@ -170,7 +170,7 @@ bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const { bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const { - if (isInt<16>(Src.getImm())) { + if (isInt<16>(SignExtend64(Src.getImm(), 32))) { IsUnsigned = false; return !TII->isInlineConstant(Src); } @@ -221,7 +221,7 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const { if (!Src0.isReg()) return; - const MachineOperand &Src1 = MI.getOperand(1); + MachineOperand &Src1 = MI.getOperand(1); if (!Src1.isImm()) return; @@ -237,6 +237,7 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const { if (!HasUImm) { SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ? AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32; + Src1.setImm(SignExtend32(Src1.getImm(), 32)); } MI.setDesc(TII->get(SOPKOpc)); @@ -249,6 +250,8 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const { if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) || (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) { + if (!TII->sopkIsZext(SOPKOpc)) + Src1.setImm(SignExtend64(Src1.getImm(), 32)); MI.setDesc(NewDesc); } } @@ -838,6 +841,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ? AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32; + Src1->setImm(SignExtend64(Src1->getImm(), 32)); MI.setDesc(TII->get(Opc)); MI.tieOperands(0, 1); } @@ -857,9 +861,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (Src.isImm() && Dst.getReg().isPhysical()) { int32_t ReverseImm; - if (isKImmOperand(Src)) + if (isKImmOperand(Src)) { MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); - else if (isReverseInlineImm(Src, ReverseImm)) { + Src.setImm(SignExtend64(Src.getImm(), 32)); + } else if (isReverseInlineImm(Src, ReverseImm)) { MI.setDesc(TII->get(AMDGPU::S_BREV_B32)); Src.setImm(ReverseImm); } diff --git a/llvm/test/CodeGen/AMDGPU/shrink-i32-kimm.mir b/llvm/test/CodeGen/AMDGPU/shrink-i32-kimm.mir new file mode 100644 index 0000000000000..e2198faf13f71 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/shrink-i32-kimm.mir @@ -0,0 +1,57 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-shrink-instructions -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: shrink_kimm32_mov_b32 +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: shrink_kimm32_mov_b32 + ; GCN: $sgpr0 = S_MOVK_I32 -2048 + $sgpr0 = S_MOV_B32 4294965248 +... + +--- +name: shrink_kimm32_cmp_eq_u32 +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: shrink_kimm32_cmp_eq_u32 + ; GCN: S_CMPK_EQ_I32 undef $sgpr0, -2048, implicit-def $scc + S_CMP_EQ_U32 undef $sgpr0, 4294965248, implicit-def $scc +... + +--- +name: shrink_kimm32_cmp_gt_i32 +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: shrink_kimm32_cmp_gt_i32 + ; GCN: S_CMPK_GT_I32 undef $sgpr0, -2048, implicit-def $scc + S_CMP_GT_I32 undef $sgpr0, 4294965248, implicit-def $scc +... + +--- +name: shrink_kimm32_add_i32 +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: shrink_kimm32_add_i32 + ; GCN: $sgpr0 = S_ADDK_I32 undef $sgpr0, -2048, implicit-def $scc + $sgpr0 = S_ADD_I32 undef $sgpr0, 4294965248, implicit-def $scc +... + +--- +name: shrink_kimm32_mul_i32 +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: shrink_kimm32_mul_i32 + ; GCN: $sgpr0 = S_MULK_I32 undef $sgpr0, -2048, implicit-def $scc + $sgpr0 = S_MUL_I32 undef $sgpr0, 4294965248, implicit-def $scc +... From d1556e5efbf0cb671c0f6e403fc1eaf9153f8713 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 26 Oct 2023 08:33:30 +0100 Subject: [PATCH 016/877] [lldb][lldb-server] Enable sending RegisterFlags as XML (#69951) This adds ToXML methods to encode RegisterFlags and its fields into XML according to GDB's target XML format: https://sourceware.org/gdb/onlinedocs/gdb/Target-Description-Format.html#Target-Description-Format lldb-server does not use libXML to build XML, so this follows the existing code that uses strings. Indentation is used so the result is still human readable. ``` ``` This is used by lldb-server when building target XML, though no one sets any fields yet. That'll come in a later commit. --- lldb/include/lldb/Target/RegisterFlags.h | 19 +++++-- .../GDBRemoteCommunicationServerLLGS.cpp | 9 ++++ lldb/source/Target/RegisterFlags.cpp | 46 +++++++++++++++++ lldb/unittests/Target/RegisterFlagsTest.cpp | 50 +++++++++++++++++++ 4 files changed, 119 insertions(+), 5 deletions(-) diff --git a/lldb/include/lldb/Target/RegisterFlags.h b/lldb/include/lldb/Target/RegisterFlags.h index d98bc0263e35e..7c5b97c2265fd 100644 --- a/lldb/include/lldb/Target/RegisterFlags.h +++ b/lldb/include/lldb/Target/RegisterFlags.h @@ -9,20 +9,21 @@ #ifndef LLDB_TARGET_REGISTERFLAGS_H #define LLDB_TARGET_REGISTERFLAGS_H -#include "lldb/Utility/Log.h" +#include +#include namespace lldb_private { +class StreamString; +class Log; + class RegisterFlags { public: class Field { public: /// Where start is the least significant bit and end is the most /// significant bit. The start bit must be <= the end bit. - Field(std::string name, unsigned start, unsigned end) - : m_name(std::move(name)), m_start(start), m_end(end) { - assert(m_start <= m_end && "Start bit must be <= end bit."); - } + Field(std::string name, unsigned start, unsigned end); /// Construct a field that occupies a single bit. Field(std::string name, unsigned bit_position) @@ -51,6 +52,11 @@ class RegisterFlags { /// covered by either field. unsigned PaddingDistance(const Field &other) const; + /// Output XML that describes this field, to be inserted into a target XML + /// file. Reserved characters in field names like "<" are replaced with + /// their XML safe equivalents like ">". + void ToXML(StreamString &strm) const; + bool operator<(const Field &rhs) const { return GetStart() < rhs.GetStart(); } @@ -106,6 +112,9 @@ class RegisterFlags { /// be split into many tables as needed. std::string AsTable(uint32_t max_width) const; + // Output XML that describes this set of flags. + void ToXML(StreamString &strm) const; + private: const std::string m_id; /// Size in bytes diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp index 23c2f18cd388a..187c23a206094 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp @@ -3094,6 +3094,12 @@ GDBRemoteCommunicationServerLLGS::BuildTargetXml() { continue; } + if (reg_info->flags_type) { + response.IndentMore(); + reg_info->flags_type->ToXML(response); + response.IndentLess(); + } + response.Indent(); response.Printf("flags_type) + response << "type=\"" << reg_info->flags_type->GetID() << "\" "; + const char *const register_set_name = reg_context.GetRegisterSetNameForRegisterAtIndex(reg_index); if (register_set_name) diff --git a/lldb/source/Target/RegisterFlags.cpp b/lldb/source/Target/RegisterFlags.cpp index 06fb45d777ec3..49974718ccb51 100644 --- a/lldb/source/Target/RegisterFlags.cpp +++ b/lldb/source/Target/RegisterFlags.cpp @@ -7,13 +7,21 @@ //===----------------------------------------------------------------------===// #include "lldb/Target/RegisterFlags.h" +#include "lldb/Utility/Log.h" #include "lldb/Utility/StreamString.h" +#include "llvm/ADT/StringExtras.h" + #include #include using namespace lldb_private; +RegisterFlags::Field::Field(std::string name, unsigned start, unsigned end) + : m_name(std::move(name)), m_start(start), m_end(end) { + assert(m_start <= m_end && "Start bit must be <= end bit."); +} + void RegisterFlags::Field::log(Log *log) const { LLDB_LOG(log, " Name: \"{0}\" Start: {1} End: {2}", m_name.c_str(), m_start, m_end); @@ -175,3 +183,41 @@ std::string RegisterFlags::AsTable(uint32_t max_width) const { return table; } + +void RegisterFlags::ToXML(StreamString &strm) const { + // Example XML: + // + // + // + strm.Indent(); + strm << ""; + for (const Field &field : m_fields) { + // Skip padding fields. + if (field.GetName().empty()) + continue; + + strm << "\n"; + strm.IndentMore(); + field.ToXML(strm); + strm.IndentLess(); + } + strm.PutChar('\n'); + strm.Indent("\n"); +} + +void RegisterFlags::Field::ToXML(StreamString &strm) const { + // Example XML: + // + strm.Indent(); + strm << ""; +} diff --git a/lldb/unittests/Target/RegisterFlagsTest.cpp b/lldb/unittests/Target/RegisterFlagsTest.cpp index 167e28d0cecb3..c7a4192031655 100644 --- a/lldb/unittests/Target/RegisterFlagsTest.cpp +++ b/lldb/unittests/Target/RegisterFlagsTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/Target/RegisterFlags.h" +#include "lldb/Utility/StreamString.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -258,3 +259,52 @@ TEST(RegisterFlagsTest, AsTable) { "| really long name |", max_many_columns.AsTable(23)); } + +TEST(RegisterFieldsTest, ToXML) { + StreamString strm; + + // RegisterFlags requires that some fields be given, so no testing of empty + // input. + + // Unnamed fields are padding that are ignored. This applies to fields passed + // in, and those generated to fill the other bits (31-1 here). + RegisterFlags("Foo", 4, {RegisterFlags::Field("", 0, 0)}).ToXML(strm); + ASSERT_EQ(strm.GetString(), "\n" + "\n"); + + strm.Clear(); + RegisterFlags("Foo", 4, {RegisterFlags::Field("abc", 0, 0)}).ToXML(strm); + ASSERT_EQ(strm.GetString(), "\n" + " \n" + "\n"); + + strm.Clear(); + // Should use the current indentation level as a starting point. + strm.IndentMore(); + RegisterFlags( + "Bar", 5, + {RegisterFlags::Field("f1", 25, 32), RegisterFlags::Field("f2", 10, 24)}) + .ToXML(strm); + ASSERT_EQ(strm.GetString(), + " \n" + " \n" + " \n" + " \n"); + + strm.Clear(); + strm.IndentLess(); + // Should replace any XML unsafe characters in field names. + RegisterFlags("Safe", 8, + {RegisterFlags::Field("A<", 4), RegisterFlags::Field("B>", 3), + RegisterFlags::Field("C'", 2), RegisterFlags::Field("D\"", 1), + RegisterFlags::Field("E&", 0)}) + .ToXML(strm); + ASSERT_EQ(strm.GetString(), + "\n" + " \n" + " \n" + " \n" + " \n" + " \n" + "\n"); +} From 18775a49416cf767b34eaaf925df5143e40582f4 Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Thu, 26 Oct 2023 08:42:25 +0100 Subject: [PATCH 017/877] [AArch64][SVE2] Use rshrnb for masked stores (#70026) This patch is a follow up on https://reviews.llvm.org/D155299. This patch combines add+lsr to rshrnb when 'B' in: C = A + B D = C >> Shift is equal to (1 << (Shift-1), and the bits in the top half of each vector element are zeroed or ignored, such as in a truncating masked store. --- .../Target/AArch64/AArch64ISelLowering.cpp | 33 +++++++++++++++---- .../AArch64/sve2-intrinsics-combine-rshrnb.ll | 19 +++++++++++ 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7211607fee528..038c23b5e8d50 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -21002,6 +21002,12 @@ static SDValue combineBoolVectorAndTruncateStore(SelectionDAG &DAG, Store->getMemOperand()); } +bool isHalvingTruncateOfLegalScalableType(EVT SrcVT, EVT DstVT) { + return (SrcVT == MVT::nxv8i16 && DstVT == MVT::nxv8i8) || + (SrcVT == MVT::nxv4i32 && DstVT == MVT::nxv4i16) || + (SrcVT == MVT::nxv2i64 && DstVT == MVT::nxv2i32); +} + static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, @@ -21043,16 +21049,16 @@ static SDValue performSTORECombine(SDNode *N, if (SDValue Store = combineBoolVectorAndTruncateStore(DAG, ST)) return Store; - if (ST->isTruncatingStore()) + if (ST->isTruncatingStore()) { + EVT StoreVT = ST->getMemoryVT(); + if (!isHalvingTruncateOfLegalScalableType(ValueVT, StoreVT)) + return SDValue(); if (SDValue Rshrnb = trySimplifySrlAddToRshrnb(ST->getOperand(1), DAG, Subtarget)) { - EVT StoreVT = ST->getMemoryVT(); - if ((ValueVT == MVT::nxv8i16 && StoreVT == MVT::nxv8i8) || - (ValueVT == MVT::nxv4i32 && StoreVT == MVT::nxv4i16) || - (ValueVT == MVT::nxv2i64 && StoreVT == MVT::nxv2i32)) - return DAG.getTruncStore(ST->getChain(), ST, Rshrnb, ST->getBasePtr(), - StoreVT, ST->getMemOperand()); + return DAG.getTruncStore(ST->getChain(), ST, Rshrnb, ST->getBasePtr(), + StoreVT, ST->getMemOperand()); } + } return SDValue(); } @@ -21098,6 +21104,19 @@ static SDValue performMSTORECombine(SDNode *N, } } + if (MST->isTruncatingStore()) { + EVT ValueVT = Value->getValueType(0); + EVT MemVT = MST->getMemoryVT(); + if (!isHalvingTruncateOfLegalScalableType(ValueVT, MemVT)) + return SDValue(); + if (SDValue Rshrnb = trySimplifySrlAddToRshrnb(Value, DAG, Subtarget)) { + return DAG.getMaskedStore(MST->getChain(), DL, Rshrnb, MST->getBasePtr(), + MST->getOffset(), MST->getMask(), + MST->getMemoryVT(), MST->getMemOperand(), + MST->getAddressingMode(), true); + } + } + return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll index a913177623df9..0afd11d098a00 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll @@ -298,3 +298,22 @@ define void @neg_add_lshr_rshrnb_s(ptr %ptr, ptr %dst, i64 %index){ store %3, ptr %4, align 1 ret void } + +define void @masked_store_rshrnb(ptr %ptr, ptr %dst, i64 %index, %mask) { ; preds = %vector.body, %vector.ph +; CHECK-LABEL: masked_store_rshrnb: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: rshrnb z0.b, z0.h, #6 +; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2] +; CHECK-NEXT: ret + %wide.masked.load = tail call @llvm.masked.load.nxv8i16.p0(ptr %ptr, i32 2, %mask, poison) + %1 = add %wide.masked.load, trunc ( shufflevector ( insertelement ( poison, i32 32, i64 0), poison, zeroinitializer) to ) + %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i32 6, i64 0), poison, zeroinitializer) to ) + %3 = trunc %2 to + %4 = getelementptr inbounds i8, ptr %dst, i64 %index + tail call void @llvm.masked.store.nxv8i8.p0( %3, ptr %4, i32 1, %mask) + ret void +} + +declare void @llvm.masked.store.nxv8i8.p0(, ptr, i32, ) +declare @llvm.masked.load.nxv8i16.p0(ptr, i32, , ) From 05dcfa44c0f4c68c3a7b831d6ac2dd0572741ece Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 26 Oct 2023 10:55:53 +0300 Subject: [PATCH 018/877] [clang] [Gnu] Improve GCCVersion parsing to match versions such as "10-win32" (#69079) In earlier GCC versions, the Debian/Ubuntu provided mingw toolchains were packaged in /usr/lib/gcc/ with version strings such as "5.3-win32", which were matched and found since 6afcd64eb65fca233a7b173f88cffb2c2c9c114c. However in recent versions, they have stopped including the minor version number and only have version strings such as "10-win32" and "10-posix". Generalize the parsing code to tolerate the patch suffix to be present on a version number with only a major number. Refactor the string parsing code to highlight the overall structure of the parsing. This implementation should yield the same result as before, except for when there's only one segment and it has trailing, non-number contents. This allows Clang to find the GCC libraries and headers in Debian/Ubuntu provided MinGW cross compilers. --- clang/lib/Driver/ToolChains/Gnu.cpp | 83 +++++++++++++++-------- clang/unittests/Driver/GCCVersionTest.cpp | 1 + 2 files changed, 56 insertions(+), 28 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index cdd911af9a733..a98dfa02fc717 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2007,45 +2007,72 @@ Generic_GCC::GCCVersion Generic_GCC::GCCVersion::Parse(StringRef VersionText) { std::pair First = VersionText.split('.'); std::pair Second = First.second.split('.'); - GCCVersion GoodVersion = {VersionText.str(), -1, -1, -1, "", "", ""}; - if (First.first.getAsInteger(10, GoodVersion.Major) || GoodVersion.Major < 0) - return BadVersion; - GoodVersion.MajorStr = First.first.str(); - if (First.second.empty()) - return GoodVersion; + StringRef MajorStr = First.first; StringRef MinorStr = Second.first; - if (Second.second.empty()) { - if (size_t EndNumber = MinorStr.find_first_not_of("0123456789")) { - GoodVersion.PatchSuffix = std::string(MinorStr.substr(EndNumber)); - MinorStr = MinorStr.slice(0, EndNumber); - } - } - if (MinorStr.getAsInteger(10, GoodVersion.Minor) || GoodVersion.Minor < 0) - return BadVersion; - GoodVersion.MinorStr = MinorStr.str(); + StringRef PatchStr = Second.second; - // First look for a number prefix and parse that if present. Otherwise just - // stash the entire patch string in the suffix, and leave the number - // unspecified. This covers versions strings such as: - // 5 (handled above) + GCCVersion GoodVersion = {VersionText.str(), -1, -1, -1, "", "", ""}; + + // Parse version number strings such as: + // 5 // 4.4 // 4.4-patched // 4.4.0 // 4.4.x // 4.4.2-rc4 // 4.4.x-patched - // And retains any patch number it finds. - StringRef PatchText = Second.second; - if (!PatchText.empty()) { - if (size_t EndNumber = PatchText.find_first_not_of("0123456789")) { - // Try to parse the number and any suffix. - if (PatchText.slice(0, EndNumber).getAsInteger(10, GoodVersion.Patch) || - GoodVersion.Patch < 0) - return BadVersion; - GoodVersion.PatchSuffix = std::string(PatchText.substr(EndNumber)); + // 10-win32 + // Split on '.', handle 1, 2 or 3 such segments. Each segment must contain + // purely a number, except for the last one, where a non-number suffix + // is stored in PatchSuffix. The third segment is allowed to not contain + // a number at all. + + auto TryParseLastNumber = [&](StringRef Segment, int &Number, + std::string &OutStr) -> bool { + // Look for a number prefix and parse that, and split out any trailing + // string into GoodVersion.PatchSuffix. + + if (size_t EndNumber = Segment.find_first_not_of("0123456789")) { + StringRef NumberStr = Segment.slice(0, EndNumber); + if (NumberStr.getAsInteger(10, Number) || Number < 0) + return false; + OutStr = NumberStr; + GoodVersion.PatchSuffix = Segment.substr(EndNumber); + return true; } + return false; + }; + auto TryParseNumber = [](StringRef Segment, int &Number) -> bool { + if (Segment.getAsInteger(10, Number) || Number < 0) + return false; + return true; + }; + + if (MinorStr.empty()) { + // If no minor string, major is the last segment + if (!TryParseLastNumber(MajorStr, GoodVersion.Major, GoodVersion.MajorStr)) + return BadVersion; + return GoodVersion; } + if (!TryParseNumber(MajorStr, GoodVersion.Major)) + return BadVersion; + GoodVersion.MajorStr = MajorStr; + + if (PatchStr.empty()) { + // If no patch string, minor is the last segment + if (!TryParseLastNumber(MinorStr, GoodVersion.Minor, GoodVersion.MinorStr)) + return BadVersion; + return GoodVersion; + } + + if (!TryParseNumber(MinorStr, GoodVersion.Minor)) + return BadVersion; + GoodVersion.MinorStr = MinorStr; + + // For the last segment, tolerate a missing number. + std::string DummyStr; + TryParseLastNumber(PatchStr, GoodVersion.Patch, DummyStr); return GoodVersion; } diff --git a/clang/unittests/Driver/GCCVersionTest.cpp b/clang/unittests/Driver/GCCVersionTest.cpp index 88c26dfe814e3..3158911fe5db9 100644 --- a/clang/unittests/Driver/GCCVersionTest.cpp +++ b/clang/unittests/Driver/GCCVersionTest.cpp @@ -39,6 +39,7 @@ const VersionParseTest TestCases[] = { {"4.4.2-rc4", 4, 4, 2, "4", "4", "-rc4"}, {"4.4.x-patched", 4, 4, -1, "4", "4", ""}, {"not-a-version", -1, -1, -1, "", "", ""}, + {"10-win32", 10, -1, -1, "10", "", "-win32"}, }; TEST(GCCVersionTest, Parse) { From de7c0068329d78027df7b7184d72646c1ca9f2bd Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Thu, 26 Oct 2023 15:56:32 +0800 Subject: [PATCH 019/877] [PowerPC] Fix use of FPSCR builtins in smmintrin.h (#67299) smmintrin.h uses __builtin_mffs, __builtin_mffsl, __builtin_mtfsf and __builtin_set_fpscr_rn. This patch replaces the uses with ppc prefix and implement the missing ones. --- clang/include/clang/Basic/BuiltinsPPC.def | 2 + clang/lib/Basic/Targets/PPC.cpp | 4 ++ clang/lib/CodeGen/CGBuiltin.cpp | 5 ++ clang/lib/Headers/ppc_wrappers/smmintrin.h | 50 +++++++++++++------ clang/test/CodeGen/PowerPC/builtins-ppc.c | 13 ++++- clang/test/CodeGen/PowerPC/ppc-emmintrin.c | 5 ++ clang/test/CodeGen/PowerPC/ppc-mmintrin.c | 5 ++ clang/test/CodeGen/PowerPC/ppc-pmmintrin.c | 3 ++ clang/test/CodeGen/PowerPC/ppc-smmintrin.c | 37 ++++++++------ clang/test/CodeGen/PowerPC/ppc-tmmintrin.c | 3 ++ clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c | 3 ++ 11 files changed, 99 insertions(+), 31 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 18a1186053481..a35488ed3dfa5 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -151,9 +151,11 @@ TARGET_BUILTIN(__builtin_ppc_extract_exp, "Uid", "", "power9-vector") TARGET_BUILTIN(__builtin_ppc_extract_sig, "ULLid", "", "power9-vector") BUILTIN(__builtin_ppc_mtfsb0, "vUIi", "") BUILTIN(__builtin_ppc_mtfsb1, "vUIi", "") +BUILTIN(__builtin_ppc_mffs, "d", "") TARGET_BUILTIN(__builtin_ppc_mffsl, "d", "", "isa-v30-instructions") BUILTIN(__builtin_ppc_mtfsf, "vUIiUi", "") BUILTIN(__builtin_ppc_mtfsfi, "vUIiUIi", "") +BUILTIN(__builtin_ppc_set_fpscr_rn, "di", "") TARGET_BUILTIN(__builtin_ppc_insert_exp, "ddULLi", "", "power9-vector") BUILTIN(__builtin_ppc_fmsub, "dddd", "") BUILTIN(__builtin_ppc_fmsubs, "ffff", "") diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 0d87a3a4e8c20..5ce276e1af9ef 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -264,6 +264,10 @@ static void defineXLCompatMacros(MacroBuilder &Builder) { Builder.defineMacro("__builtin_minfe", "__builtin_ppc_minfe"); Builder.defineMacro("__builtin_minfl", "__builtin_ppc_minfl"); Builder.defineMacro("__builtin_minfs", "__builtin_ppc_minfs"); + Builder.defineMacro("__builtin_mffs", "__builtin_ppc_mffs"); + Builder.defineMacro("__builtin_mffsl", "__builtin_ppc_mffsl"); + Builder.defineMacro("__builtin_mtfsf", "__builtin_ppc_mtfsf"); + Builder.defineMacro("__builtin_set_fpscr_rn", "__builtin_ppc_set_fpscr_rn"); } /// PPCTargetInfo::getTargetDefines - Return a set of the PowerPC-specific diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 85be8bdd00516..dce5ee5888c45 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17397,6 +17397,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Value *Op1 = EmitScalarExpr(E->getArg(1)); return Builder.CreateFDiv(Op0, Op1, "swdiv"); } + case PPC::BI__builtin_ppc_set_fpscr_rn: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd), + {EmitScalarExpr(E->getArg(0))}); + case PPC::BI__builtin_ppc_mffs: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm)); } } diff --git a/clang/lib/Headers/ppc_wrappers/smmintrin.h b/clang/lib/Headers/ppc_wrappers/smmintrin.h index 349b395c4f00b..19cdecb18d2b8 100644 --- a/clang/lib/Headers/ppc_wrappers/smmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/smmintrin.h @@ -14,7 +14,7 @@ #ifndef NO_WARN_X86_INTRINSICS /* This header is distributed to simplify porting x86_64 code that - makes explicit use of Intel intrinsics to powerp64/powerpc64le. + makes explicit use of Intel intrinsics to powerpc64/powerpc64le. It is the user's responsibility to determine if the results are acceptable and make additional changes as necessary. @@ -68,10 +68,10 @@ extern __inline __m128d __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; #else - __fpscr_save.__fr = __builtin_mffs(); + __fpscr_save.__fr = __builtin_ppc_mffs(); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; __fpscr_save.__fpscr &= ~0xf8; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); #endif /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule @@ -83,10 +83,15 @@ extern __inline __m128d switch (__rounding) { case _MM_FROUND_TO_NEAREST_INT: - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __attribute__((fallthrough)); case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: - __builtin_set_fpscr_rn(0b00); + __builtin_ppc_set_fpscr_rn(0b00); /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule a read/use of the variable before the FPSCR is modified, above. @@ -102,7 +107,7 @@ extern __inline __m128d This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : : "wa"(__r)); - __builtin_set_fpscr_rn(__fpscr_save.__fpscr); + __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); break; case _MM_FROUND_TO_NEG_INF: case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: @@ -128,9 +133,14 @@ extern __inline __m128d */ __asm__("" : : "wa"(__r)); /* Restore enabled exceptions. */ - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __fpscr_save.__fpscr |= __enables_save.__fpscr; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); } return (__m128d)__r; } @@ -159,10 +169,10 @@ extern __inline __m128 __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; #else - __fpscr_save.__fr = __builtin_mffs(); + __fpscr_save.__fr = __builtin_ppc_mffs(); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; __fpscr_save.__fpscr &= ~0xf8; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); #endif /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule @@ -174,10 +184,15 @@ extern __inline __m128 switch (__rounding) { case _MM_FROUND_TO_NEAREST_INT: - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __attribute__((fallthrough)); case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: - __builtin_set_fpscr_rn(0b00); + __builtin_ppc_set_fpscr_rn(0b00); /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule a read/use of the variable before the FPSCR is modified, above. @@ -193,7 +208,7 @@ extern __inline __m128 This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : : "wa"(__r)); - __builtin_set_fpscr_rn(__fpscr_save.__fpscr); + __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); break; case _MM_FROUND_TO_NEG_INF: case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: @@ -219,9 +234,14 @@ extern __inline __m128 */ __asm__("" : : "wa"(__r)); /* Restore enabled exceptions. */ - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __fpscr_save.__fpscr |= __enables_save.__fpscr; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); } return (__m128)__r; } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc.c b/clang/test/CodeGen/PowerPC/builtins-ppc.c index ccc91b6560845..c13edf44cdcbd 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc.c @@ -1,5 +1,8 @@ // REQUIRES: powerpc-registered-target -// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \ +// RUN: | FileCheck %s +// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \ +// RUN: -target-cpu pwr9 | FileCheck %s --check-prefixes=P9,CHECK void test_eh_return_data_regno() { @@ -26,6 +29,9 @@ void test_builtin_ppc_setrnd() { // CHECK: call double @llvm.ppc.setrnd(i32 %2) res = __builtin_setrnd(x); + + // CHECK: call double @llvm.ppc.setrnd(i32 %4) + res = __builtin_ppc_set_fpscr_rn(x); } void test_builtin_ppc_flm() { @@ -33,7 +39,10 @@ void test_builtin_ppc_flm() { // CHECK: call double @llvm.ppc.readflm() res = __builtin_readflm(); - // CHECK: call double @llvm.ppc.setflm(double %1) + // CHECK: call double @llvm.ppc.readflm() + res = __builtin_ppc_mffs(); + + // CHECK: call double @llvm.ppc.setflm(double %2) res = __builtin_setflm(res); #ifdef _ARCH_PWR9 diff --git a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c index e2d26e611ac81..15d291496c20a 100644 --- a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c @@ -8,6 +8,11 @@ // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P10 +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only + // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ diff --git a/clang/test/CodeGen/PowerPC/ppc-mmintrin.c b/clang/test/CodeGen/PowerPC/ppc-mmintrin.c index 4cb5b8540092f..1dc6292ae3244 100644 --- a/clang/test/CodeGen/PowerPC/ppc-mmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-mmintrin.c @@ -9,6 +9,11 @@ // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n| FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-LE +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only + // RUN: %clang -S -emit-llvm -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-BE // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \ diff --git a/clang/test/CodeGen/PowerPC/ppc-pmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-pmmintrin.c index 39194427978ad..6e152c549498d 100644 --- a/clang/test/CodeGen/PowerPC/ppc-pmmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-pmmintrin.c @@ -13,6 +13,9 @@ // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -DNO_MM_MALLOC -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -DNO_MM_MALLOC -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only + #include __m128d resd, md1, md2; diff --git a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c index 220b65c1ce164..7daef71a61c32 100644 --- a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c @@ -15,6 +15,11 @@ // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefix=P10 +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only + // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s // RUN: %clang -S -emit-llvm -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ @@ -239,44 +244,48 @@ test_round() { // CHECK-LABEL: @test_round // CHECK-LABEL: define available_externally <4 x float> @_mm_round_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) -// CHECK: call signext i32 @__builtin_mffs() -// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.readflm() +// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0" -// CHECK: call signext i32 @__builtin_mffsl() -// CHECK: call signext i32 @__builtin_set_fpscr_rn(i32 noundef signext 0) +// CHECK: call double @llvm.ppc.readflm() +// P10: call double @llvm.ppc.mffsl() +// CHECK: call double @llvm.ppc.setrnd(i32 0) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0" // CHECK: call <4 x float> @vec_rint(float vector[4]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call signext i32 @__builtin_set_fpscr_rn(i64 noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.setrnd(i32 %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x float> @vec_floor(float vector[4]) // CHECK: call <4 x float> @vec_ceil(float vector[4]) // CHECK: call <4 x float> @vec_trunc(float vector[4]) // CHECK: call <4 x float> @vec_rint(float vector[4]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call signext i32 @__builtin_mffsl() -// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.readflm() +// P10: call double @llvm.ppc.mffsl() +// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK-LABEL: define available_externally <4 x float> @_mm_round_ss(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x float> @_mm_round_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) // CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 // CHECK-LABEL: define available_externally <2 x double> @_mm_round_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) -// CHECK: call signext i32 @__builtin_mffs() -// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.readflm() +// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0" -// CHECK: call signext i32 @__builtin_mffsl() -// CHECK: call signext i32 @__builtin_set_fpscr_rn(i32 noundef signext 0) +// CHECK: call double @llvm.ppc.readflm() +// P10: call double @llvm.ppc.mffsl() +// CHECK: call double @llvm.ppc.setrnd(i32 0) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0" // CHECK: call <2 x double> @vec_rint(double vector[2]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call signext i32 @__builtin_set_fpscr_rn(i64 noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.setrnd(i32 %{{[0-9a-zA-Z_.]+}}) // CHECK: call <2 x double> @vec_floor(double vector[2]) // CHECK: call <2 x double> @vec_ceil(double vector[2]) // CHECK: call <2 x double> @vec_trunc(double vector[2]) // CHECK: call <2 x double> @vec_rint(double vector[2]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call signext i32 @__builtin_mffsl() -// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.readflm() +// P10: call double @llvm.ppc.mffsl() +// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK-LABEL: define available_externally <2 x double> @_mm_round_sd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) // CHECK: call <2 x double> @_mm_round_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) diff --git a/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c index 60633e34b56b9..40d3839dcf026 100644 --- a/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c @@ -13,6 +13,9 @@ // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only + #include __m64 res, m1, m2; diff --git a/clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c b/clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c index 238ce7c7ee574..ac90a5f8c530b 100644 --- a/clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c @@ -12,6 +12,9 @@ // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr7 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr7 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only + #include unsigned short us; From a1260b5209968c08886e3c6183aa793de8931578 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Thu, 26 Oct 2023 09:57:14 +0200 Subject: [PATCH 020/877] [AMDGPU] Use `S_CSELECT` for uniform i1 ext (#69703) Solves #59869 --- llvm/lib/Target/AMDGPU/SIInstructions.td | 25 +++++---- llvm/test/CodeGen/AMDGPU/saddo.ll | 65 ++++++++++++++---------- llvm/test/CodeGen/AMDGPU/uaddo.ll | 46 +++++++++-------- llvm/test/CodeGen/AMDGPU/usubo.ll | 46 +++++++++-------- llvm/test/CodeGen/AMDGPU/zero_extend.ll | 2 +- 5 files changed, 107 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 567f1b812c180..707a1c72b5b7c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2278,17 +2278,24 @@ def : GCNPat < (REG_SEQUENCE SReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1) >; -class ZExt_i64_i1_Pat : GCNPat < - (i64 (ext i1:$src)), - (REG_SEQUENCE VReg_64, - (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), - /*src1mod*/(i32 0), /*src1*/(i32 1), $src), - sub0, (S_MOV_B32 (i32 0)), sub1) ->; +multiclass ZExt_i64_i1_Pat { + def: GCNPat < + (i64 (ext i1:$src)), + (REG_SEQUENCE VReg_64, + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 1), $src), + sub0, (S_MOV_B32 (i32 0)), sub1) + >; + + def : GCNPat < + (i64 (UniformUnaryFrag SCC)), + (S_CSELECT_B64 (i64 1), (i64 0)) + >; +} -def : ZExt_i64_i1_Pat; -def : ZExt_i64_i1_Pat; +defm : ZExt_i64_i1_Pat; +defm : ZExt_i64_i1_Pat; // FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that // REG_SEQUENCE patterns don't support instructions with multiple outputs. diff --git a/llvm/test/CodeGen/AMDGPU/saddo.ll b/llvm/test/CodeGen/AMDGPU/saddo.ll index cb3166d7a20d3..f6f3e47c3be7a 100644 --- a/llvm/test/CodeGen/AMDGPU/saddo.ll +++ b/llvm/test/CodeGen/AMDGPU/saddo.ll @@ -29,10 +29,12 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: s_xor_b64 s[4:5], s[6:7], vcc -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] -; SI-NEXT: v_mov_b32_e32 v1, s11 -; SI-NEXT: v_add_i32_e32 v0, vcc, s10, v0 -; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; SI-NEXT: s_and_b64 s[4:5], s[4:5], exec +; SI-NEXT: s_cselect_b64 s[4:5], 1, 0 +; SI-NEXT: s_add_u32 s4, s10, s4 +; SI-NEXT: s_addc_u32 s5, s11, s5 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: v_mov_b32_e32 v1, s5 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -45,15 +47,17 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) ; VI-NEXT: s_add_u32 s2, s6, s0 ; VI-NEXT: v_mov_b32_e32 v2, s7 ; VI-NEXT: s_addc_u32 s3, s7, s1 -; VI-NEXT: v_cmp_lt_i64_e64 s[8:9], s[0:1], 0 ; VI-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[1:2] -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_xor_b64 s[0:1], s[8:9], vcc -; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] -; VI-NEXT: v_add_u32_e32 v2, vcc, s2, v2 +; VI-NEXT: v_cmp_lt_i64_e64 s[0:1], s[0:1], 0 ; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: s_xor_b64 s[0:1], s[0:1], vcc +; VI-NEXT: s_and_b64 s[0:1], s[0:1], exec +; VI-NEXT: s_cselect_b64 s[0:1], 1, 0 +; VI-NEXT: s_add_u32 s0, s2, s0 +; VI-NEXT: s_addc_u32 s1, s3, s1 +; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s5 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_mov_b32_e32 v2, s0 ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; VI-NEXT: s_endpgm ; @@ -67,13 +71,15 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) ; GFX9-NEXT: s_add_u32 s0, s6, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s7, s3 -; GFX9-NEXT: v_cmp_lt_i64_e64 s[8:9], s[2:3], 0 ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1] +; GFX9-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], 0 +; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], vcc +; GFX9-NEXT: s_and_b64 s[2:3], s[2:3], exec +; GFX9-NEXT: s_cselect_b64 s[2:3], 1, 0 +; GFX9-NEXT: s_add_u32 s0, s0, s2 +; GFX9-NEXT: s_addc_u32 s1, s1, s3 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: s_xor_b64 s[2:3], s[8:9], vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3] -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm ; @@ -87,11 +93,14 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) ; GFX10-NEXT: s_add_u32 s0, s6, s2 ; GFX10-NEXT: s_addc_u32 s1, s7, s3 ; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], 0 -; GFX10-NEXT: v_cmp_lt_i64_e64 s3, s[0:1], s[6:7] -; GFX10-NEXT: s_xor_b32 s2, s2, s3 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 -; GFX10-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX10-NEXT: v_cmp_lt_i64_e64 s6, s[0:1], s[6:7] +; GFX10-NEXT: s_xor_b32 s2, s2, s6 +; GFX10-NEXT: s_and_b32 s2, s2, exec_lo +; GFX10-NEXT: s_cselect_b64 s[2:3], 1, 0 +; GFX10-NEXT: s_add_u32 s0, s0, s2 +; GFX10-NEXT: s_addc_u32 s1, s1, s3 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX10-NEXT: s_endpgm ; @@ -100,18 +109,20 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34 -; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_add_u32 s2, s6, s0 ; GFX11-NEXT: s_addc_u32 s3, s7, s1 ; GFX11-NEXT: v_cmp_lt_i64_e64 s0, s[0:1], 0 -; GFX11-NEXT: v_cmp_lt_i64_e64 s1, s[2:3], s[6:7] +; GFX11-NEXT: v_cmp_lt_i64_e64 s6, s[2:3], s[6:7] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_xor_b32 s0, s0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_add_co_u32 v0, s0, s2, v0 -; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 +; GFX11-NEXT: s_xor_b32 s0, s0, s6 +; GFX11-NEXT: s_and_b32 s0, s0, exec_lo +; GFX11-NEXT: s_cselect_b64 s[0:1], 1, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_add_u32 s0, s2, s0 +; GFX11-NEXT: s_addc_u32 s1, s3, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) diff --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll index 4363db2351e7a..0ebf3f5198203 100644 --- a/llvm/test/CodeGen/AMDGPU/uaddo.ll +++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll @@ -7,21 +7,23 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; SI-LABEL: s_uaddo_i64_zext: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_add_u32 s0, s6, s0 +; SI-NEXT: v_mov_b32_e32 v0, s6 +; SI-NEXT: v_mov_b32_e32 v1, s7 +; SI-NEXT: s_addc_u32 s1, s7, s1 +; SI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1] +; SI-NEXT: s_and_b64 s[6:7], vcc, exec +; SI-NEXT: s_cselect_b64 s[6:7], 1, 0 +; SI-NEXT: s_add_u32 s6, s0, s6 +; SI-NEXT: s_addc_u32 s7, s1, s7 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_add_u32 s4, s6, s8 ; SI-NEXT: v_mov_b32_e32 v0, s6 ; SI-NEXT: v_mov_b32_e32 v1, s7 -; SI-NEXT: s_addc_u32 s5, s7, s9 -; SI-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1] -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: v_add_i32_e32 v0, vcc, s4, v0 -; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -30,17 +32,19 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: v_mov_b32_e32 v1, s6 ; VI-NEXT: s_add_u32 s0, s6, s0 -; VI-NEXT: v_mov_b32_e32 v3, s7 ; VI-NEXT: s_addc_u32 s1, s7, s1 -; VI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[2:3] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v2, s7 +; VI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[1:2] ; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: s_and_b64 s[2:3], vcc, exec +; VI-NEXT: s_cselect_b64 s[2:3], 1, 0 +; VI-NEXT: s_add_u32 s0, s0, s2 +; VI-NEXT: s_addc_u32 s1, s1, s3 +; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s5 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_mov_b32_e32 v2, s0 ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; VI-NEXT: s_endpgm ; @@ -52,13 +56,15 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: s_add_u32 s0, s6, s2 -; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s7, s3 +; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1] +; GFX9-NEXT: s_and_b64 s[2:3], vcc, exec +; GFX9-NEXT: s_cselect_b64 s[2:3], 1, 0 +; GFX9-NEXT: s_add_u32 s0, s0, s2 +; GFX9-NEXT: s_addc_u32 s1, s1, s3 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) diff --git a/llvm/test/CodeGen/AMDGPU/usubo.ll b/llvm/test/CodeGen/AMDGPU/usubo.ll index 37b5be3b672f2..ade0616137b17 100644 --- a/llvm/test/CodeGen/AMDGPU/usubo.ll +++ b/llvm/test/CodeGen/AMDGPU/usubo.ll @@ -8,21 +8,23 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; SI-LABEL: s_usubo_i64_zext: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_sub_u32 s0, s6, s0 +; SI-NEXT: v_mov_b32_e32 v0, s6 +; SI-NEXT: v_mov_b32_e32 v1, s7 +; SI-NEXT: s_subb_u32 s1, s7, s1 +; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1] +; SI-NEXT: s_and_b64 s[6:7], vcc, exec +; SI-NEXT: s_cselect_b64 s[6:7], 1, 0 +; SI-NEXT: s_add_u32 s6, s0, s6 +; SI-NEXT: s_addc_u32 s7, s1, s7 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_sub_u32 s4, s6, s8 ; SI-NEXT: v_mov_b32_e32 v0, s6 ; SI-NEXT: v_mov_b32_e32 v1, s7 -; SI-NEXT: s_subb_u32 s5, s7, s9 -; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1] -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: v_add_i32_e32 v0, vcc, s4, v0 -; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -31,17 +33,19 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: v_mov_b32_e32 v1, s6 ; VI-NEXT: s_sub_u32 s0, s6, s0 -; VI-NEXT: v_mov_b32_e32 v3, s7 ; VI-NEXT: s_subb_u32 s1, s7, s1 -; VI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[2:3] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v2, s7 +; VI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[1:2] ; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: s_and_b64 s[2:3], vcc, exec +; VI-NEXT: s_cselect_b64 s[2:3], 1, 0 +; VI-NEXT: s_add_u32 s0, s0, s2 +; VI-NEXT: s_addc_u32 s1, s1, s3 +; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s5 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_mov_b32_e32 v2, s0 ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; VI-NEXT: s_endpgm ; @@ -53,13 +57,15 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: s_sub_u32 s0, s6, s2 -; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_subb_u32 s1, s7, s3 +; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1] +; GFX9-NEXT: s_and_b64 s[2:3], vcc, exec +; GFX9-NEXT: s_cselect_b64 s[2:3], 1, 0 +; GFX9-NEXT: s_add_u32 s0, s0, s2 +; GFX9-NEXT: s_addc_u32 s1, s1, s3 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) #0 diff --git a/llvm/test/CodeGen/AMDGPU/zero_extend.ll b/llvm/test/CodeGen/AMDGPU/zero_extend.ll index 1f532f2706de7..9933cdc18e5fd 100644 --- a/llvm/test/CodeGen/AMDGPU/zero_extend.ll +++ b/llvm/test/CodeGen/AMDGPU/zero_extend.ll @@ -38,7 +38,7 @@ define amdgpu_kernel void @s_arg_zext_i1_to_i64(ptr addrspace(1) %out, i1 zeroex ; GCN-LABEL: {{^}}s_cmp_zext_i1_to_i64: ; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 0 ; GCN-DAG: s_cmp_eq_u32 -; GCN: v_cndmask_b32 +; GCN: s_cselect_b64 s[{{[0-9]+:[0-9]+}}], 1, 0 define amdgpu_kernel void @s_cmp_zext_i1_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { %cmp = icmp eq i32 %a, %b %ext = zext i1 %cmp to i64 From e58c4c771329eda8ef407b080e2f91e30aabac73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 26 Oct 2023 10:58:28 +0300 Subject: [PATCH 021/877] [LLD] [COFF] Recognize Itanium vtables for ICF (#70196) The testcases are plain copies of the existing ICF vtable testcase, with symbol names renamed to match the Itanium vtable name pattern. --- lld/COFF/ICF.cpp | 5 ++++- lld/test/COFF/icf-vtables-itanium-i386.s | 27 +++++++++++++++++++++++ lld/test/COFF/icf-vtables-itanium.s | 28 ++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 lld/test/COFF/icf-vtables-itanium-i386.s create mode 100644 lld/test/COFF/icf-vtables-itanium.s diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp index 0f43da0dbc101..013ffcfb3d5d1 100644 --- a/lld/COFF/ICF.cpp +++ b/lld/COFF/ICF.cpp @@ -94,7 +94,10 @@ bool ICF::isEligible(SectionChunk *c) { return true; // So are vtables. - if (c->sym && c->sym->getName().starts_with("??_7")) + const char *itaniumVtablePrefix = + ctx.config.machine == I386 ? "__ZTV" : "_ZTV"; + if (c->sym && (c->sym->getName().starts_with("??_7") || + c->sym->getName().starts_with(itaniumVtablePrefix))) return true; // Anything else not in an address-significance table is eligible. diff --git a/lld/test/COFF/icf-vtables-itanium-i386.s b/lld/test/COFF/icf-vtables-itanium-i386.s new file mode 100644 index 0000000000000..77b3755178b8b --- /dev/null +++ b/lld/test/COFF/icf-vtables-itanium-i386.s @@ -0,0 +1,27 @@ +# REQUIRES: x86 +# RUN: llvm-mc -triple=i386-windows-gnu -filetype=obj -o %t.obj %s +# RUN: lld-link %t.obj /out:%t.exe /entry:main /subsystem:console /safeseh:no +# RUN: llvm-objdump -s %t.exe | FileCheck %s + +# CHECK: Contents of section .text: +.globl _main +_main: +# CHECK-NEXT: 401000 00204000 01204000 01204000 +.long __ZTS +.long __ZTV +.long __ZTVa + +.section .rdata,"dr",discard,__ZTS +.globl __ZTS +__ZTS: +.byte 42 + +.section .rdata,"dr",discard,__ZTV +.globl __ZTV +__ZTV: +.byte 42 + +.section .rdata,"dr",discard,__ZTVa +.globl __ZTVa +__ZTVa: +.byte 42 diff --git a/lld/test/COFF/icf-vtables-itanium.s b/lld/test/COFF/icf-vtables-itanium.s new file mode 100644 index 0000000000000..94f2b611dc86b --- /dev/null +++ b/lld/test/COFF/icf-vtables-itanium.s @@ -0,0 +1,28 @@ +# REQUIRES: x86 +# RUN: llvm-mc -triple=x86_64-windows-gnu -filetype=obj -o %t.obj %s +# RUN: lld-link %t.obj /out:%t.exe /entry:main /subsystem:console +# RUN: llvm-objdump -s %t.exe | FileCheck %s + +# CHECK: Contents of section .text: +.globl main +main: +# CHECK-NEXT: 140001000 00200040 01000000 01200040 01000000 +.8byte _ZTS +.8byte _ZTV +# CHECK-NEXT: 140001010 01200040 01000000 +.8byte _ZTVa + +.section .rdata,"dr",discard,_ZTS +.globl _ZTS +_ZTS: +.byte 42 + +.section .rdata,"dr",discard,_ZTV +.globl _ZTV +_ZTV: +.byte 42 + +.section .rdata,"dr",discard,_ZTVa +.globl _ZTVa +_ZTVa: +.byte 42 From 34fe8be705c608c4c25ece725eef9305ccfa64e1 Mon Sep 17 00:00:00 2001 From: Mikael Holmen Date: Wed, 25 Oct 2023 07:09:54 +0200 Subject: [PATCH 022/877] [test][AggressiveInstCombine] Precommit testcase for #69925 We get different results with/without debug info present. --- .../AArch64/combine_ignore_debug.ll | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll new file mode 100644 index 0000000000000..4b41060544f7a --- /dev/null +++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -mtriple aarch64 -aggressive-instcombine-max-scan-instrs=1 -passes="aggressive-instcombine" -S < %s | FileCheck %s -check-prefix DBG +; RUN: opt -strip-debug -mtriple aarch64 -aggressive-instcombine-max-scan-instrs=1 -passes="aggressive-instcombine" -S < %s | FileCheck %s -check-prefix NODBG + +; FIXME: The DBG and NODBG cases should be the same. I.e. we should optimize the +; DBG case too even if there is a dbg.value. +; This is described in https://github.com/llvm/llvm-project/issues/69925 + +target datalayout = "E" + +%s = type { i16, i16 } + +@e = global %s zeroinitializer, align 1 +@l = global %s zeroinitializer, align 1 + +define void @test() { +; DBG-LABEL: define void @test() { +; DBG-NEXT: entry: +; DBG-NEXT: [[L1:%.*]] = load i16, ptr @e, align 1 +; DBG-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata [[META3:![0-9]+]], metadata !DIExpression()), !dbg [[DBG5:![0-9]+]] +; DBG-NEXT: [[L2:%.*]] = load i16, ptr getelementptr inbounds ([[S:%.*]], ptr @e, i16 0, i32 1), align 1 +; DBG-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i32 +; DBG-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32 +; DBG-NEXT: [[S1:%.*]] = shl nuw i32 [[E1]], 16 +; DBG-NEXT: [[O1:%.*]] = or i32 [[S1]], [[E2]] +; DBG-NEXT: store i32 [[O1]], ptr @l, align 1 +; DBG-NEXT: ret void +; +; NODBG-LABEL: define void @test() { +; NODBG-NEXT: entry: +; NODBG-NEXT: [[L1:%.*]] = load i32, ptr @e, align 1 +; NODBG-NEXT: store i32 [[L1]], ptr @l, align 1 +; NODBG-NEXT: ret void +; +entry: + %l1 = load i16, ptr @e, align 1 + call void @llvm.dbg.value(metadata i32 undef, metadata !3, metadata !DIExpression()), !dbg !5 + %l2 = load i16, ptr getelementptr inbounds (%s, ptr @e, i16 0, i32 1), align 1 + %e2 = zext i16 %l2 to i32 + %e1 = zext i16 %l1 to i32 + %s1 = shl nuw i32 %e1, 16 + %o1 = or i32 %s1, %e2 + store i32 %o1, ptr @l, align 1 + ret void +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1) +!1 = !DIFile(filename: "foo.c", directory: "/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !DILocalVariable(scope: !4) +!4 = distinct !DISubprogram(unit: !0) +!5 = !DILocation(scope: !4) From ce0a750fe4b04630fcb242822627d790f4c2878b Mon Sep 17 00:00:00 2001 From: Mikael Holmen Date: Wed, 25 Oct 2023 07:11:15 +0200 Subject: [PATCH 023/877] [AggressiveInstCombine] Ignore debug instructions when load combining (#70200) We previously included debug instructions when counting instructions when looking for loads to combine. This meant that the presence of debug instructions could affect optimization, as shown in the updated testcase. This fixes #69925. --- .../AggressiveInstCombine.cpp | 5 ++++- .../AArch64/combine_ignore_debug.ll | 14 ++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index a55d01645f10e..d09ac1c099c1a 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -706,7 +706,10 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, make_range(Start->getIterator(), End->getIterator())) { if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc))) return false; - if (++NumScanned > MaxInstrsToScan) + + // Ignore debug info so that's not counted against MaxInstrsToScan. + // Otherwise debug info could affect codegen. + if (!isa(Inst) && ++NumScanned > MaxInstrsToScan) return false; } diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll index 4b41060544f7a..68455a1f9074e 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll @@ -2,9 +2,8 @@ ; RUN: opt -mtriple aarch64 -aggressive-instcombine-max-scan-instrs=1 -passes="aggressive-instcombine" -S < %s | FileCheck %s -check-prefix DBG ; RUN: opt -strip-debug -mtriple aarch64 -aggressive-instcombine-max-scan-instrs=1 -passes="aggressive-instcombine" -S < %s | FileCheck %s -check-prefix NODBG -; FIXME: The DBG and NODBG cases should be the same. I.e. we should optimize the -; DBG case too even if there is a dbg.value. -; This is described in https://github.com/llvm/llvm-project/issues/69925 +; The DBG and NODBG cases should be the same. I.e. we should optimize the DBG +; case too even if there is a dbg.value. target datalayout = "E" @@ -16,14 +15,9 @@ target datalayout = "E" define void @test() { ; DBG-LABEL: define void @test() { ; DBG-NEXT: entry: -; DBG-NEXT: [[L1:%.*]] = load i16, ptr @e, align 1 +; DBG-NEXT: [[L1:%.*]] = load i32, ptr @e, align 1 ; DBG-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata [[META3:![0-9]+]], metadata !DIExpression()), !dbg [[DBG5:![0-9]+]] -; DBG-NEXT: [[L2:%.*]] = load i16, ptr getelementptr inbounds ([[S:%.*]], ptr @e, i16 0, i32 1), align 1 -; DBG-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i32 -; DBG-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32 -; DBG-NEXT: [[S1:%.*]] = shl nuw i32 [[E1]], 16 -; DBG-NEXT: [[O1:%.*]] = or i32 [[S1]], [[E2]] -; DBG-NEXT: store i32 [[O1]], ptr @l, align 1 +; DBG-NEXT: store i32 [[L1]], ptr @l, align 1 ; DBG-NEXT: ret void ; ; NODBG-LABEL: define void @test() { From 5ef45c02dc0cd6cce0b3eb099707b8fc696a8ff6 Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Thu, 26 Oct 2023 10:02:32 +0200 Subject: [PATCH 024/877] [mlir][cuda] Avoid driver call to check max shared memory (#70021) This PR guards the driver call with if-statement as the driver calls are more expensive. As a future todo, the if statement could be generated by the compiler and thus optimized in some cases. --- .../ExecutionEngine/CudaRuntimeWrappers.cpp | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp index 55db744af021c..a8e743c519135 100644 --- a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp +++ b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp @@ -168,20 +168,23 @@ mgpuLaunchKernel(CUfunction function, intptr_t gridX, intptr_t gridY, intptr_t blockZ, int32_t smem, CUstream stream, void **params, void **extra, size_t /*paramsCount*/) { ScopedContext scopedContext; - int32_t maxShmem = 0; - CUdevice device = getDefaultCuDevice(); - CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice)); - CUDA_REPORT_IF_ERROR(cuDeviceGetAttribute( - &maxShmem, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, - device)); - if (maxShmem < smem) { - fprintf(stderr, - "Requested shared memory (%dkb) is larger than maximum allowed " - "shared memory (%dkb) for this device\n", - smem, maxShmem); + if (smem > 0) { + // Avoid checking driver as it's more expensive than if statement + int32_t maxShmem = 0; + CUdevice device = getDefaultCuDevice(); + CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice)); + CUDA_REPORT_IF_ERROR(cuDeviceGetAttribute( + &maxShmem, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, + device)); + if (maxShmem < smem) { + fprintf(stderr, + "Requested shared memory (%dkb) is larger than maximum allowed " + "shared memory (%dkb) for this device\n", + smem, maxShmem); + } + CUDA_REPORT_IF_ERROR(cuFuncSetAttribute( + function, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, smem)); } - CUDA_REPORT_IF_ERROR(cuFuncSetAttribute( - function, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, smem)); debug_print("Launching kernel, grid=%ld,%ld,%ld, " "threads: %ld, %ld, %ld, " "smem: %dkb\n", From f7dc26cab25e1000b1198625e30ce1acdbe8fcb4 Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Thu, 26 Oct 2023 10:02:54 +0200 Subject: [PATCH 025/877] [mlir] Fixed typo in type (128x64 -> 64x128) in TMA load test (#70022) The test was meant to check `64x128xf16` as the contiguous dimension exceeds the cache line (128b). TMA requires cache line-aligned loads, so loading 64x128 can be done with two 64x64 loads, as documented in the test. However, there was a typo in the type, which was `memref<128x64xf16>` instead of the correct `memref<64x128xf16>`. This PR corrects the issue and updates the verification. --- .../CUDA/sm90/tma_load_64x64_swizzle128b.mlir | 143 +++++++++--------- 1 file changed, 69 insertions(+), 74 deletions(-) diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir index 242c5ff875cf4..13b9c48dabe85 100644 --- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir +++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir @@ -33,8 +33,8 @@ !shmemlhs = memref<128x64xf16, 3> !lhsTensorMap = !nvgpu.tensormap.descriptor -!rhs = memref<128x64xf16> -!shmemrhs = memref<128x64xf16, 3> +!rhs = memref<64x128xf16> +!shmemrhs = memref<64x128xf16, 3> !rhsTensorMap = !nvgpu.tensormap.descriptor module @mymod { @@ -68,10 +68,6 @@ module @mymod { memref.store %vR, %rhs[%i, %j] : !rhs %vR32 = arith.extf %vR : f16 to f32 memref.store %vR32, %rhs32[%i, %j] : memref<64x128xf32> - } - } - scf.for %j = %c0 to %c128 step %c1 { - scf.for %i = %c0 to %c64 step %c1 { %b0 = arith.muli %j, %c64 : index %b00 = arith.addi %b0, %i : index %b01 = arith.divui %b00, %c8 : index @@ -103,20 +99,19 @@ module @mymod { %6 = gpu.thread_id x %lhsShmem = memref.get_global @bufferLhsGlobal : !shmemlhs %rhsShmem = memref.get_global @bufferRhsGlobal : !shmemrhs - %rhsShmem2 = memref.subview %rhsShmem[%c32, %c0][%c32, %c128][%c1, %c1] : !shmemrhs to memref, 3> + %rhsShmem2 = memref.subview %rhsShmem[32, 0][128, 64][1, 1] : !shmemrhs to memref<128x64xf16, strided<[128, 1], offset: 4096>, 3> // Step 5. Initialize the mbarrier %9 = nvgpu.mbarrier.create -> !barrierType nvgpu.mbarrier.init %9[%c0], %5 : !barrierType %10 = arith.cmpi eq, %6, %c0 : index - // Step 6. First thread does TMA load scf.if %10 { gpu.printf "[GPU] TMA SIZE %d\0A" %c32768 : index nvgpu.tma.async.load %d_lhsTensorMap[%c0, %c0], %9[%c0] to %lhsShmem : !lhsTensorMap, !barrierType -> !shmemlhs nvgpu.tma.async.load %d_rhsTensorMap[%c0, %c0], %9[%c0] to %rhsShmem : !rhsTensorMap, !barrierType -> !shmemrhs - nvgpu.tma.async.load %d_rhsTensorMap[%c64, %c0], %9[%c0] to %rhsShmem2 : !rhsTensorMap, !barrierType -> memref, 3> + nvgpu.tma.async.load %d_rhsTensorMap[%c64, %c0], %9[%c0] to %rhsShmem2 : !rhsTensorMap, !barrierType -> memref<128x64xf16, strided<[128, 1], offset: 4096>, 3> nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c32768 : !barrierType } else { nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c0 : !barrierType @@ -147,69 +142,69 @@ module @mymod { // CHECK: [GPU] TMA SIZE 32768 -// CHECK: ===--- Matrix B ---=== -1 -// CHECK: 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 17, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18, 18, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 23, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, -1 -// CHECK: 17, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18, 18, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 23, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 36, 36, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, -1 -// CHECK: 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 36, 36, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 51, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 49, 49, 48, 48, 48, 48, 48, 48, 48, 48, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 52, 52, -1 -// CHECK: 51, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 49, 49, 48, 48, 48, 48, 48, 48, 48, 48, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 52, 52, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, -1 -// CHECK: 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 85, 85, 85, 85, 85, 85, 85, 85, 84, 84, 84, 84, 84, 84, 84, 84, 87, 87, 87, 87, 87, 87, 87, 87, 86, 86, 86, 86, 86, 86, 86, 86, 81, 81, 81, 81, 81, 81, 81, 81, 80, 80, 80, 80, 80, 80, 80, 80, 83, 83, 83, 83, 83, 83, 83, 83, 82, 82, 82, 82, 82, 82, 82, 82, -1 -// CHECK: 85, 85, 85, 85, 85, 85, 85, 85, 84, 84, 84, 84, 84, 84, 84, 84, 87, 87, 87, 87, 87, 87, 87, 87, 86, 86, 86, 86, 86, 86, 86, 86, 81, 81, 81, 81, 81, 81, 81, 81, 80, 80, 80, 80, 80, 80, 80, 80, 83, 83, 83, 83, 83, 83, 83, 83, 82, 82, 82, 82, 82, 82, 82, 82, 102, 102, 102, 102, 102, 102, 102, 102, 103, 103, 103, 103, 103, 103, 103, 103, 100, 100, 100, 100, 100, 100, 100, 100, 101, 101, 101, 101, 101, 101, 101, 101, 98, 98, 98, 98, 98, 98, 98, 98, 99, 99, 99, 99, 99, 99, 99, 99, 96, 96, 96, 96, 96, 96, 96, 96, 97, 97, 97, 97, 97, 97, 97, 97, -1 -// CHECK: 102, 102, 102, 102, 102, 102, 102, 102, 103, 103, 103, 103, 103, 103, 103, 103, 100, 100, 100, 100, 100, 100, 100, 100, 101, 101, 101, 101, 101, 101, 101, 101, 98, 98, 98, 98, 98, 98, 98, 98, 99, 99, 99, 99, 99, 99, 99, 99, 96, 96, 96, 96, 96, 96, 96, 96, 97, 97, 97, 97, 97, 97, 97, 97, 119, 119, 119, 119, 119, 119, 119, 119, 118, 118, 118, 118, 118, 118, 118, 118, 117, 117, 117, 117, 117, 117, 117, 117, 116, 116, 116, 116, 116, 116, 116, 116, 115, 115, 115, 115, 115, 115, 115, 115, 114, 114, 114, 114, 114, 114, 114, 114, 113, 113, 113, 113, 113, 113, 113, 113, 112, 112, 112, 112, 112, 112, 112, 112, -1 -// CHECK: 119, 119, 119, 119, 119, 119, 119, 119, 118, 118, 118, 118, 118, 118, 118, 118, 117, 117, 117, 117, 117, 117, 117, 117, 116, 116, 116, 116, 116, 116, 116, 116, 115, 115, 115, 115, 115, 115, 115, 115, 114, 114, 114, 114, 114, 114, 114, 114, 113, 113, 113, 113, 113, 113, 113, 113, 112, 112, 112, 112, 112, 112, 112, 112, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 130, 130, 130, 130, 130, 130, 130, 130, 131, 131, 131, 131, 131, 131, 131, 131, 132, 132, 132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133, 133, 134, 134, 134, 134, 134, 134, 134, 134, 135, 135, 135, 135, 135, 135, 135, 135, -1 -// CHECK: 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 130, 130, 130, 130, 130, 130, 130, 130, 131, 131, 131, 131, 131, 131, 131, 131, 132, 132, 132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133, 133, 134, 134, 134, 134, 134, 134, 134, 134, 135, 135, 135, 135, 135, 135, 135, 135, 145, 145, 145, 145, 145, 145, 145, 145, 144, 144, 144, 144, 144, 144, 144, 144, 147, 147, 147, 147, 147, 147, 147, 147, 146, 146, 146, 146, 146, 146, 146, 146, 149, 149, 149, 149, 149, 149, 149, 149, 148, 148, 148, 148, 148, 148, 148, 148, 151, 151, 151, 151, 151, 151, 151, 151, 150, 150, 150, 150, 150, 150, 150, 150, -1 -// CHECK: 145, 145, 145, 145, 145, 145, 145, 145, 144, 144, 144, 144, 144, 144, 144, 144, 147, 147, 147, 147, 147, 147, 147, 147, 146, 146, 146, 146, 146, 146, 146, 146, 149, 149, 149, 149, 149, 149, 149, 149, 148, 148, 148, 148, 148, 148, 148, 148, 151, 151, 151, 151, 151, 151, 151, 151, 150, 150, 150, 150, 150, 150, 150, 150, 162, 162, 162, 162, 162, 162, 162, 162, 163, 163, 163, 163, 163, 163, 163, 163, 160, 160, 160, 160, 160, 160, 160, 160, 161, 161, 161, 161, 161, 161, 161, 161, 166, 166, 166, 166, 166, 166, 166, 166, 167, 167, 167, 167, 167, 167, 167, 167, 164, 164, 164, 164, 164, 164, 164, 164, 165, 165, 165, 165, 165, 165, 165, 165, -1 -// CHECK: 162, 162, 162, 162, 162, 162, 162, 162, 163, 163, 163, 163, 163, 163, 163, 163, 160, 160, 160, 160, 160, 160, 160, 160, 161, 161, 161, 161, 161, 161, 161, 161, 166, 166, 166, 166, 166, 166, 166, 166, 167, 167, 167, 167, 167, 167, 167, 167, 164, 164, 164, 164, 164, 164, 164, 164, 165, 165, 165, 165, 165, 165, 165, 165, 179, 179, 179, 179, 179, 179, 179, 179, 178, 178, 178, 178, 178, 178, 178, 178, 177, 177, 177, 177, 177, 177, 177, 177, 176, 176, 176, 176, 176, 176, 176, 176, 183, 183, 183, 183, 183, 183, 183, 183, 182, 182, 182, 182, 182, 182, 182, 182, 181, 181, 181, 181, 181, 181, 181, 181, 180, 180, 180, 180, 180, 180, 180, 180, -1 -// CHECK: 179, 179, 179, 179, 179, 179, 179, 179, 178, 178, 178, 178, 178, 178, 178, 178, 177, 177, 177, 177, 177, 177, 177, 177, 176, 176, 176, 176, 176, 176, 176, 176, 183, 183, 183, 183, 183, 183, 183, 183, 182, 182, 182, 182, 182, 182, 182, 182, 181, 181, 181, 181, 181, 181, 181, 181, 180, 180, 180, 180, 180, 180, 180, 180, 196, 196, 196, 196, 196, 196, 196, 196, 197, 197, 197, 197, 197, 197, 197, 197, 198, 198, 198, 198, 198, 198, 198, 198, 199, 199, 199, 199, 199, 199, 199, 199, 192, 192, 192, 192, 192, 192, 192, 192, 193, 193, 193, 193, 193, 193, 193, 193, 194, 194, 194, 194, 194, 194, 194, 194, 195, 195, 195, 195, 195, 195, 195, 195, -1 -// CHECK: 196, 196, 196, 196, 196, 196, 196, 196, 197, 197, 197, 197, 197, 197, 197, 197, 198, 198, 198, 198, 198, 198, 198, 198, 199, 199, 199, 199, 199, 199, 199, 199, 192, 192, 192, 192, 192, 192, 192, 192, 193, 193, 193, 193, 193, 193, 193, 193, 194, 194, 194, 194, 194, 194, 194, 194, 195, 195, 195, 195, 195, 195, 195, 195, 213, 213, 213, 213, 213, 213, 213, 213, 212, 212, 212, 212, 212, 212, 212, 212, 215, 215, 215, 215, 215, 215, 215, 215, 214, 214, 214, 214, 214, 214, 214, 214, 209, 209, 209, 209, 209, 209, 209, 209, 208, 208, 208, 208, 208, 208, 208, 208, 211, 211, 211, 211, 211, 211, 211, 211, 210, 210, 210, 210, 210, 210, 210, 210, -1 -// CHECK: 213, 213, 213, 213, 213, 213, 213, 213, 212, 212, 212, 212, 212, 212, 212, 212, 215, 215, 215, 215, 215, 215, 215, 215, 214, 214, 214, 214, 214, 214, 214, 214, 209, 209, 209, 209, 209, 209, 209, 209, 208, 208, 208, 208, 208, 208, 208, 208, 211, 211, 211, 211, 211, 211, 211, 211, 210, 210, 210, 210, 210, 210, 210, 210, 230, 230, 230, 230, 230, 230, 230, 230, 231, 231, 231, 231, 231, 231, 231, 231, 228, 228, 228, 228, 228, 228, 228, 228, 229, 229, 229, 229, 229, 229, 229, 229, 226, 226, 226, 226, 226, 226, 226, 226, 227, 227, 227, 227, 227, 227, 227, 227, 224, 224, 224, 224, 224, 224, 224, 224, 225, 225, 225, 225, 225, 225, 225, 225, -1 -// CHECK: 230, 230, 230, 230, 230, 230, 230, 230, 231, 231, 231, 231, 231, 231, 231, 231, 228, 228, 228, 228, 228, 228, 228, 228, 229, 229, 229, 229, 229, 229, 229, 229, 226, 226, 226, 226, 226, 226, 226, 226, 227, 227, 227, 227, 227, 227, 227, 227, 224, 224, 224, 224, 224, 224, 224, 224, 225, 225, 225, 225, 225, 225, 225, 225, 247, 247, 247, 247, 247, 247, 247, 247, 246, 246, 246, 246, 246, 246, 246, 246, 245, 245, 245, 245, 245, 245, 245, 245, 244, 244, 244, 244, 244, 244, 244, 244, 243, 243, 243, 243, 243, 243, 243, 243, 242, 242, 242, 242, 242, 242, 242, 242, 241, 241, 241, 241, 241, 241, 241, 241, 240, 240, 240, 240, 240, 240, 240, 240, -1 -// CHECK: 247, 247, 247, 247, 247, 247, 247, 247, 246, 246, 246, 246, 246, 246, 246, 246, 245, 245, 245, 245, 245, 245, 245, 245, 244, 244, 244, 244, 244, 244, 244, 244, 243, 243, 243, 243, 243, 243, 243, 243, 242, 242, 242, 242, 242, 242, 242, 242, 241, 241, 241, 241, 241, 241, 241, 241, 240, 240, 240, 240, 240, 240, 240, 240, 256, 256, 256, 256, 256, 256, 256, 256, 257, 257, 257, 257, 257, 257, 257, 257, 258, 258, 258, 258, 258, 258, 258, 258, 259, 259, 259, 259, 259, 259, 259, 259, 260, 260, 260, 260, 260, 260, 260, 260, 261, 261, 261, 261, 261, 261, 261, 261, 262, 262, 262, 262, 262, 262, 262, 262, 263, 263, 263, 263, 263, 263, 263, 263, -1 -// CHECK: 256, 256, 256, 256, 256, 256, 256, 256, 257, 257, 257, 257, 257, 257, 257, 257, 258, 258, 258, 258, 258, 258, 258, 258, 259, 259, 259, 259, 259, 259, 259, 259, 260, 260, 260, 260, 260, 260, 260, 260, 261, 261, 261, 261, 261, 261, 261, 261, 262, 262, 262, 262, 262, 262, 262, 262, 263, 263, 263, 263, 263, 263, 263, 263, 273, 273, 273, 273, 273, 273, 273, 273, 272, 272, 272, 272, 272, 272, 272, 272, 275, 275, 275, 275, 275, 275, 275, 275, 274, 274, 274, 274, 274, 274, 274, 274, 277, 277, 277, 277, 277, 277, 277, 277, 276, 276, 276, 276, 276, 276, 276, 276, 279, 279, 279, 279, 279, 279, 279, 279, 278, 278, 278, 278, 278, 278, 278, 278, -1 -// CHECK: 273, 273, 273, 273, 273, 273, 273, 273, 272, 272, 272, 272, 272, 272, 272, 272, 275, 275, 275, 275, 275, 275, 275, 275, 274, 274, 274, 274, 274, 274, 274, 274, 277, 277, 277, 277, 277, 277, 277, 277, 276, 276, 276, 276, 276, 276, 276, 276, 279, 279, 279, 279, 279, 279, 279, 279, 278, 278, 278, 278, 278, 278, 278, 278, 290, 290, 290, 290, 290, 290, 290, 290, 291, 291, 291, 291, 291, 291, 291, 291, 288, 288, 288, 288, 288, 288, 288, 288, 289, 289, 289, 289, 289, 289, 289, 289, 294, 294, 294, 294, 294, 294, 294, 294, 295, 295, 295, 295, 295, 295, 295, 295, 292, 292, 292, 292, 292, 292, 292, 292, 293, 293, 293, 293, 293, 293, 293, 293, -1 -// CHECK: 290, 290, 290, 290, 290, 290, 290, 290, 291, 291, 291, 291, 291, 291, 291, 291, 288, 288, 288, 288, 288, 288, 288, 288, 289, 289, 289, 289, 289, 289, 289, 289, 294, 294, 294, 294, 294, 294, 294, 294, 295, 295, 295, 295, 295, 295, 295, 295, 292, 292, 292, 292, 292, 292, 292, 292, 293, 293, 293, 293, 293, 293, 293, 293, 307, 307, 307, 307, 307, 307, 307, 307, 306, 306, 306, 306, 306, 306, 306, 306, 305, 305, 305, 305, 305, 305, 305, 305, 304, 304, 304, 304, 304, 304, 304, 304, 311, 311, 311, 311, 311, 311, 311, 311, 310, 310, 310, 310, 310, 310, 310, 310, 309, 309, 309, 309, 309, 309, 309, 309, 308, 308, 308, 308, 308, 308, 308, 308, -1 -// CHECK: 307, 307, 307, 307, 307, 307, 307, 307, 306, 306, 306, 306, 306, 306, 306, 306, 305, 305, 305, 305, 305, 305, 305, 305, 304, 304, 304, 304, 304, 304, 304, 304, 311, 311, 311, 311, 311, 311, 311, 311, 310, 310, 310, 310, 310, 310, 310, 310, 309, 309, 309, 309, 309, 309, 309, 309, 308, 308, 308, 308, 308, 308, 308, 308, 324, 324, 324, 324, 324, 324, 324, 324, 325, 325, 325, 325, 325, 325, 325, 325, 326, 326, 326, 326, 326, 326, 326, 326, 327, 327, 327, 327, 327, 327, 327, 327, 320, 320, 320, 320, 320, 320, 320, 320, 321, 321, 321, 321, 321, 321, 321, 321, 322, 322, 322, 322, 322, 322, 322, 322, 323, 323, 323, 323, 323, 323, 323, 323, -1 -// CHECK: 324, 324, 324, 324, 324, 324, 324, 324, 325, 325, 325, 325, 325, 325, 325, 325, 326, 326, 326, 326, 326, 326, 326, 326, 327, 327, 327, 327, 327, 327, 327, 327, 320, 320, 320, 320, 320, 320, 320, 320, 321, 321, 321, 321, 321, 321, 321, 321, 322, 322, 322, 322, 322, 322, 322, 322, 323, 323, 323, 323, 323, 323, 323, 323, 341, 341, 341, 341, 341, 341, 341, 341, 340, 340, 340, 340, 340, 340, 340, 340, 343, 343, 343, 343, 343, 343, 343, 343, 342, 342, 342, 342, 342, 342, 342, 342, 337, 337, 337, 337, 337, 337, 337, 337, 336, 336, 336, 336, 336, 336, 336, 336, 339, 339, 339, 339, 339, 339, 339, 339, 338, 338, 338, 338, 338, 338, 338, 338, -1 -// CHECK: 341, 341, 341, 341, 341, 341, 341, 341, 340, 340, 340, 340, 340, 340, 340, 340, 343, 343, 343, 343, 343, 343, 343, 343, 342, 342, 342, 342, 342, 342, 342, 342, 337, 337, 337, 337, 337, 337, 337, 337, 336, 336, 336, 336, 336, 336, 336, 336, 339, 339, 339, 339, 339, 339, 339, 339, 338, 338, 338, 338, 338, 338, 338, 338, 358, 358, 358, 358, 358, 358, 358, 358, 359, 359, 359, 359, 359, 359, 359, 359, 356, 356, 356, 356, 356, 356, 356, 356, 357, 357, 357, 357, 357, 357, 357, 357, 354, 354, 354, 354, 354, 354, 354, 354, 355, 355, 355, 355, 355, 355, 355, 355, 352, 352, 352, 352, 352, 352, 352, 352, 353, 353, 353, 353, 353, 353, 353, 353, -1 -// CHECK: 358, 358, 358, 358, 358, 358, 358, 358, 359, 359, 359, 359, 359, 359, 359, 359, 356, 356, 356, 356, 356, 356, 356, 356, 357, 357, 357, 357, 357, 357, 357, 357, 354, 354, 354, 354, 354, 354, 354, 354, 355, 355, 355, 355, 355, 355, 355, 355, 352, 352, 352, 352, 352, 352, 352, 352, 353, 353, 353, 353, 353, 353, 353, 353, 375, 375, 375, 375, 375, 375, 375, 375, 374, 374, 374, 374, 374, 374, 374, 374, 373, 373, 373, 373, 373, 373, 373, 373, 372, 372, 372, 372, 372, 372, 372, 372, 371, 371, 371, 371, 371, 371, 371, 371, 370, 370, 370, 370, 370, 370, 370, 370, 369, 369, 369, 369, 369, 369, 369, 369, 368, 368, 368, 368, 368, 368, 368, 368, -1 -// CHECK: 375, 375, 375, 375, 375, 375, 375, 375, 374, 374, 374, 374, 374, 374, 374, 374, 373, 373, 373, 373, 373, 373, 373, 373, 372, 372, 372, 372, 372, 372, 372, 372, 371, 371, 371, 371, 371, 371, 371, 371, 370, 370, 370, 370, 370, 370, 370, 370, 369, 369, 369, 369, 369, 369, 369, 369, 368, 368, 368, 368, 368, 368, 368, 368, 384, 384, 384, 384, 384, 384, 384, 384, 385, 385, 385, 385, 385, 385, 385, 385, 386, 386, 386, 386, 386, 386, 386, 386, 387, 387, 387, 387, 387, 387, 387, 387, 388, 388, 388, 388, 388, 388, 388, 388, 389, 389, 389, 389, 389, 389, 389, 389, 390, 390, 390, 390, 390, 390, 390, 390, 391, 391, 391, 391, 391, 391, 391, 391, -1 -// CHECK: 384, 384, 384, 384, 384, 384, 384, 384, 385, 385, 385, 385, 385, 385, 385, 385, 386, 386, 386, 386, 386, 386, 386, 386, 387, 387, 387, 387, 387, 387, 387, 387, 388, 388, 388, 388, 388, 388, 388, 388, 389, 389, 389, 389, 389, 389, 389, 389, 390, 390, 390, 390, 390, 390, 390, 390, 391, 391, 391, 391, 391, 391, 391, 391, 401, 401, 401, 401, 401, 401, 401, 401, 400, 400, 400, 400, 400, 400, 400, 400, 403, 403, 403, 403, 403, 403, 403, 403, 402, 402, 402, 402, 402, 402, 402, 402, 405, 405, 405, 405, 405, 405, 405, 405, 404, 404, 404, 404, 404, 404, 404, 404, 407, 407, 407, 407, 407, 407, 407, 407, 406, 406, 406, 406, 406, 406, 406, 406, -1 -// CHECK: 401, 401, 401, 401, 401, 401, 401, 401, 400, 400, 400, 400, 400, 400, 400, 400, 403, 403, 403, 403, 403, 403, 403, 403, 402, 402, 402, 402, 402, 402, 402, 402, 405, 405, 405, 405, 405, 405, 405, 405, 404, 404, 404, 404, 404, 404, 404, 404, 407, 407, 407, 407, 407, 407, 407, 407, 406, 406, 406, 406, 406, 406, 406, 406, 418, 418, 418, 418, 418, 418, 418, 418, 419, 419, 419, 419, 419, 419, 419, 419, 416, 416, 416, 416, 416, 416, 416, 416, 417, 417, 417, 417, 417, 417, 417, 417, 422, 422, 422, 422, 422, 422, 422, 422, 423, 423, 423, 423, 423, 423, 423, 423, 420, 420, 420, 420, 420, 420, 420, 420, 421, 421, 421, 421, 421, 421, 421, 421, -1 -// CHECK: 418, 418, 418, 418, 418, 418, 418, 418, 419, 419, 419, 419, 419, 419, 419, 419, 416, 416, 416, 416, 416, 416, 416, 416, 417, 417, 417, 417, 417, 417, 417, 417, 422, 422, 422, 422, 422, 422, 422, 422, 423, 423, 423, 423, 423, 423, 423, 423, 420, 420, 420, 420, 420, 420, 420, 420, 421, 421, 421, 421, 421, 421, 421, 421, 435, 435, 435, 435, 435, 435, 435, 435, 434, 434, 434, 434, 434, 434, 434, 434, 433, 433, 433, 433, 433, 433, 433, 433, 432, 432, 432, 432, 432, 432, 432, 432, 439, 439, 439, 439, 439, 439, 439, 439, 438, 438, 438, 438, 438, 438, 438, 438, 437, 437, 437, 437, 437, 437, 437, 437, 436, 436, 436, 436, 436, 436, 436, 436, -1 -// CHECK: 435, 435, 435, 435, 435, 435, 435, 435, 434, 434, 434, 434, 434, 434, 434, 434, 433, 433, 433, 433, 433, 433, 433, 433, 432, 432, 432, 432, 432, 432, 432, 432, 439, 439, 439, 439, 439, 439, 439, 439, 438, 438, 438, 438, 438, 438, 438, 438, 437, 437, 437, 437, 437, 437, 437, 437, 436, 436, 436, 436, 436, 436, 436, 436, 452, 452, 452, 452, 452, 452, 452, 452, 453, 453, 453, 453, 453, 453, 453, 453, 454, 454, 454, 454, 454, 454, 454, 454, 455, 455, 455, 455, 455, 455, 455, 455, 448, 448, 448, 448, 448, 448, 448, 448, 449, 449, 449, 449, 449, 449, 449, 449, 450, 450, 450, 450, 450, 450, 450, 450, 451, 451, 451, 451, 451, 451, 451, 451, -1 -// CHECK: 452, 452, 452, 452, 452, 452, 452, 452, 453, 453, 453, 453, 453, 453, 453, 453, 454, 454, 454, 454, 454, 454, 454, 454, 455, 455, 455, 455, 455, 455, 455, 455, 448, 448, 448, 448, 448, 448, 448, 448, 449, 449, 449, 449, 449, 449, 449, 449, 450, 450, 450, 450, 450, 450, 450, 450, 451, 451, 451, 451, 451, 451, 451, 451, 469, 469, 469, 469, 469, 469, 469, 469, 468, 468, 468, 468, 468, 468, 468, 468, 471, 471, 471, 471, 471, 471, 471, 471, 470, 470, 470, 470, 470, 470, 470, 470, 465, 465, 465, 465, 465, 465, 465, 465, 464, 464, 464, 464, 464, 464, 464, 464, 467, 467, 467, 467, 467, 467, 467, 467, 466, 466, 466, 466, 466, 466, 466, 466, -1 -// CHECK: 469, 469, 469, 469, 469, 469, 469, 469, 468, 468, 468, 468, 468, 468, 468, 468, 471, 471, 471, 471, 471, 471, 471, 471, 470, 470, 470, 470, 470, 470, 470, 470, 465, 465, 465, 465, 465, 465, 465, 465, 464, 464, 464, 464, 464, 464, 464, 464, 467, 467, 467, 467, 467, 467, 467, 467, 466, 466, 466, 466, 466, 466, 466, 466, 486, 486, 486, 486, 486, 486, 486, 486, 487, 487, 487, 487, 487, 487, 487, 487, 484, 484, 484, 484, 484, 484, 484, 484, 485, 485, 485, 485, 485, 485, 485, 485, 482, 482, 482, 482, 482, 482, 482, 482, 483, 483, 483, 483, 483, 483, 483, 483, 480, 480, 480, 480, 480, 480, 480, 480, 481, 481, 481, 481, 481, 481, 481, 481, -1 -// CHECK: 486, 486, 486, 486, 486, 486, 486, 486, 487, 487, 487, 487, 487, 487, 487, 487, 484, 484, 484, 484, 484, 484, 484, 484, 485, 485, 485, 485, 485, 485, 485, 485, 482, 482, 482, 482, 482, 482, 482, 482, 483, 483, 483, 483, 483, 483, 483, 483, 480, 480, 480, 480, 480, 480, 480, 480, 481, 481, 481, 481, 481, 481, 481, 481, 503, 503, 503, 503, 503, 503, 503, 503, 502, 502, 502, 502, 502, 502, 502, 502, 501, 501, 501, 501, 501, 501, 501, 501, 500, 500, 500, 500, 500, 500, 500, 500, 499, 499, 499, 499, 499, 499, 499, 499, 498, 498, 498, 498, 498, 498, 498, 498, 497, 497, 497, 497, 497, 497, 497, 497, 496, 496, 496, 496, 496, 496, 496, 496, -1 -// CHECK: 503, 503, 503, 503, 503, 503, 503, 503, 502, 502, 502, 502, 502, 502, 502, 502, 501, 501, 501, 501, 501, 501, 501, 501, 500, 500, 500, 500, 500, 500, 500, 500, 499, 499, 499, 499, 499, 499, 499, 499, 498, 498, 498, 498, 498, 498, 498, 498, 497, 497, 497, 497, 497, 497, 497, 497, 496, 496, 496, 496, 496, 496, 496, 496, 512, 512, 512, 512, 512, 512, 512, 512, 513, 513, 513, 513, 513, 513, 513, 513, 514, 514, 514, 514, 514, 514, 514, 514, 515, 515, 515, 515, 515, 515, 515, 515, 516, 516, 516, 516, 516, 516, 516, 516, 517, 517, 517, 517, 517, 517, 517, 517, 518, 518, 518, 518, 518, 518, 518, 518, 519, 519, 519, 519, 519, 519, 519, 519, -1 -// CHECK: 512, 512, 512, 512, 512, 512, 512, 512, 513, 513, 513, 513, 513, 513, 513, 513, 514, 514, 514, 514, 514, 514, 514, 514, 515, 515, 515, 515, 515, 515, 515, 515, 516, 516, 516, 516, 516, 516, 516, 516, 517, 517, 517, 517, 517, 517, 517, 517, 518, 518, 518, 518, 518, 518, 518, 518, 519, 519, 519, 519, 519, 519, 519, 519, 529, 529, 529, 529, 529, 529, 529, 529, 528, 528, 528, 528, 528, 528, 528, 528, 531, 531, 531, 531, 531, 531, 531, 531, 530, 530, 530, 530, 530, 530, 530, 530, 533, 533, 533, 533, 533, 533, 533, 533, 532, 532, 532, 532, 532, 532, 532, 532, 535, 535, 535, 535, 535, 535, 535, 535, 534, 534, 534, 534, 534, 534, 534, 534, -1 -// CHECK: 529, 529, 529, 529, 529, 529, 529, 529, 528, 528, 528, 528, 528, 528, 528, 528, 531, 531, 531, 531, 531, 531, 531, 531, 530, 530, 530, 530, 530, 530, 530, 530, 533, 533, 533, 533, 533, 533, 533, 533, 532, 532, 532, 532, 532, 532, 532, 532, 535, 535, 535, 535, 535, 535, 535, 535, 534, 534, 534, 534, 534, 534, 534, 534, 546, 546, 546, 546, 546, 546, 546, 546, 547, 547, 547, 547, 547, 547, 547, 547, 544, 544, 544, 544, 544, 544, 544, 544, 545, 545, 545, 545, 545, 545, 545, 545, 550, 550, 550, 550, 550, 550, 550, 550, 551, 551, 551, 551, 551, 551, 551, 551, 548, 548, 548, 548, 548, 548, 548, 548, 549, 549, 549, 549, 549, 549, 549, 549, -1 -// CHECK: 546, 546, 546, 546, 546, 546, 546, 546, 547, 547, 547, 547, 547, 547, 547, 547, 544, 544, 544, 544, 544, 544, 544, 544, 545, 545, 545, 545, 545, 545, 545, 545, 550, 550, 550, 550, 550, 550, 550, 550, 551, 551, 551, 551, 551, 551, 551, 551, 548, 548, 548, 548, 548, 548, 548, 548, 549, 549, 549, 549, 549, 549, 549, 549, 563, 563, 563, 563, 563, 563, 563, 563, 562, 562, 562, 562, 562, 562, 562, 562, 561, 561, 561, 561, 561, 561, 561, 561, 560, 560, 560, 560, 560, 560, 560, 560, 567, 567, 567, 567, 567, 567, 567, 567, 566, 566, 566, 566, 566, 566, 566, 566, 565, 565, 565, 565, 565, 565, 565, 565, 564, 564, 564, 564, 564, 564, 564, 564, -1 -// CHECK: 563, 563, 563, 563, 563, 563, 563, 563, 562, 562, 562, 562, 562, 562, 562, 562, 561, 561, 561, 561, 561, 561, 561, 561, 560, 560, 560, 560, 560, 560, 560, 560, 567, 567, 567, 567, 567, 567, 567, 567, 566, 566, 566, 566, 566, 566, 566, 566, 565, 565, 565, 565, 565, 565, 565, 565, 564, 564, 564, 564, 564, 564, 564, 564, 580, 580, 580, 580, 580, 580, 580, 580, 581, 581, 581, 581, 581, 581, 581, 581, 582, 582, 582, 582, 582, 582, 582, 582, 583, 583, 583, 583, 583, 583, 583, 583, 576, 576, 576, 576, 576, 576, 576, 576, 577, 577, 577, 577, 577, 577, 577, 577, 578, 578, 578, 578, 578, 578, 578, 578, 579, 579, 579, 579, 579, 579, 579, 579, -1 -// CHECK: 580, 580, 580, 580, 580, 580, 580, 580, 581, 581, 581, 581, 581, 581, 581, 581, 582, 582, 582, 582, 582, 582, 582, 582, 583, 583, 583, 583, 583, 583, 583, 583, 576, 576, 576, 576, 576, 576, 576, 576, 577, 577, 577, 577, 577, 577, 577, 577, 578, 578, 578, 578, 578, 578, 578, 578, 579, 579, 579, 579, 579, 579, 579, 579, 597, 597, 597, 597, 597, 597, 597, 597, 596, 596, 596, 596, 596, 596, 596, 596, 599, 599, 599, 599, 599, 599, 599, 599, 598, 598, 598, 598, 598, 598, 598, 598, 593, 593, 593, 593, 593, 593, 593, 593, 592, 592, 592, 592, 592, 592, 592, 592, 595, 595, 595, 595, 595, 595, 595, 595, 594, 594, 594, 594, 594, 594, 594, 594, -1 -// CHECK: 597, 597, 597, 597, 597, 597, 597, 597, 596, 596, 596, 596, 596, 596, 596, 596, 599, 599, 599, 599, 599, 599, 599, 599, 598, 598, 598, 598, 598, 598, 598, 598, 593, 593, 593, 593, 593, 593, 593, 593, 592, 592, 592, 592, 592, 592, 592, 592, 595, 595, 595, 595, 595, 595, 595, 595, 594, 594, 594, 594, 594, 594, 594, 594, 614, 614, 614, 614, 614, 614, 614, 614, 615, 615, 615, 615, 615, 615, 615, 615, 612, 612, 612, 612, 612, 612, 612, 612, 613, 613, 613, 613, 613, 613, 613, 613, 610, 610, 610, 610, 610, 610, 610, 610, 611, 611, 611, 611, 611, 611, 611, 611, 608, 608, 608, 608, 608, 608, 608, 608, 609, 609, 609, 609, 609, 609, 609, 609, -1 -// CHECK: 614, 614, 614, 614, 614, 614, 614, 614, 615, 615, 615, 615, 615, 615, 615, 615, 612, 612, 612, 612, 612, 612, 612, 612, 613, 613, 613, 613, 613, 613, 613, 613, 610, 610, 610, 610, 610, 610, 610, 610, 611, 611, 611, 611, 611, 611, 611, 611, 608, 608, 608, 608, 608, 608, 608, 608, 609, 609, 609, 609, 609, 609, 609, 609, 631, 631, 631, 631, 631, 631, 631, 631, 630, 630, 630, 630, 630, 630, 630, 630, 629, 629, 629, 629, 629, 629, 629, 629, 628, 628, 628, 628, 628, 628, 628, 628, 627, 627, 627, 627, 627, 627, 627, 627, 626, 626, 626, 626, 626, 626, 626, 626, 625, 625, 625, 625, 625, 625, 625, 625, 624, 624, 624, 624, 624, 624, 624, 624, -1 -// CHECK: 631, 631, 631, 631, 631, 631, 631, 631, 630, 630, 630, 630, 630, 630, 630, 630, 629, 629, 629, 629, 629, 629, 629, 629, 628, 628, 628, 628, 628, 628, 628, 628, 627, 627, 627, 627, 627, 627, 627, 627, 626, 626, 626, 626, 626, 626, 626, 626, 625, 625, 625, 625, 625, 625, 625, 625, 624, 624, 624, 624, 624, 624, 624, 624, 640, 640, 640, 640, 640, 640, 640, 640, 641, 641, 641, 641, 641, 641, 641, 641, 642, 642, 642, 642, 642, 642, 642, 642, 643, 643, 643, 643, 643, 643, 643, 643, 644, 644, 644, 644, 644, 644, 644, 644, 645, 645, 645, 645, 645, 645, 645, 645, 646, 646, 646, 646, 646, 646, 646, 646, 647, 647, 647, 647, 647, 647, 647, 647, -1 -// CHECK: 640, 640, 640, 640, 640, 640, 640, 640, 641, 641, 641, 641, 641, 641, 641, 641, 642, 642, 642, 642, 642, 642, 642, 642, 643, 643, 643, 643, 643, 643, 643, 643, 644, 644, 644, 644, 644, 644, 644, 644, 645, 645, 645, 645, 645, 645, 645, 645, 646, 646, 646, 646, 646, 646, 646, 646, 647, 647, 647, 647, 647, 647, 647, 647, 657, 657, 657, 657, 657, 657, 657, 657, 656, 656, 656, 656, 656, 656, 656, 656, 659, 659, 659, 659, 659, 659, 659, 659, 658, 658, 658, 658, 658, 658, 658, 658, 661, 661, 661, 661, 661, 661, 661, 661, 660, 660, 660, 660, 660, 660, 660, 660, 663, 663, 663, 663, 663, 663, 663, 663, 662, 662, 662, 662, 662, 662, 662, 662, -1 -// CHECK: 657, 657, 657, 657, 657, 657, 657, 657, 656, 656, 656, 656, 656, 656, 656, 656, 659, 659, 659, 659, 659, 659, 659, 659, 658, 658, 658, 658, 658, 658, 658, 658, 661, 661, 661, 661, 661, 661, 661, 661, 660, 660, 660, 660, 660, 660, 660, 660, 663, 663, 663, 663, 663, 663, 663, 663, 662, 662, 662, 662, 662, 662, 662, 662, 674, 674, 674, 674, 674, 674, 674, 674, 675, 675, 675, 675, 675, 675, 675, 675, 672, 672, 672, 672, 672, 672, 672, 672, 673, 673, 673, 673, 673, 673, 673, 673, 678, 678, 678, 678, 678, 678, 678, 678, 679, 679, 679, 679, 679, 679, 679, 679, 676, 676, 676, 676, 676, 676, 676, 676, 677, 677, 677, 677, 677, 677, 677, 677, -1 -// CHECK: 674, 674, 674, 674, 674, 674, 674, 674, 675, 675, 675, 675, 675, 675, 675, 675, 672, 672, 672, 672, 672, 672, 672, 672, 673, 673, 673, 673, 673, 673, 673, 673, 678, 678, 678, 678, 678, 678, 678, 678, 679, 679, 679, 679, 679, 679, 679, 679, 676, 676, 676, 676, 676, 676, 676, 676, 677, 677, 677, 677, 677, 677, 677, 677, 691, 691, 691, 691, 691, 691, 691, 691, 690, 690, 690, 690, 690, 690, 690, 690, 689, 689, 689, 689, 689, 689, 689, 689, 688, 688, 688, 688, 688, 688, 688, 688, 695, 695, 695, 695, 695, 695, 695, 695, 694, 694, 694, 694, 694, 694, 694, 694, 693, 693, 693, 693, 693, 693, 693, 693, 692, 692, 692, 692, 692, 692, 692, 692, -1 -// CHECK: 691, 691, 691, 691, 691, 691, 691, 691, 690, 690, 690, 690, 690, 690, 690, 690, 689, 689, 689, 689, 689, 689, 689, 689, 688, 688, 688, 688, 688, 688, 688, 688, 695, 695, 695, 695, 695, 695, 695, 695, 694, 694, 694, 694, 694, 694, 694, 694, 693, 693, 693, 693, 693, 693, 693, 693, 692, 692, 692, 692, 692, 692, 692, 692, 708, 708, 708, 708, 708, 708, 708, 708, 709, 709, 709, 709, 709, 709, 709, 709, 710, 710, 710, 710, 710, 710, 710, 710, 711, 711, 711, 711, 711, 711, 711, 711, 704, 704, 704, 704, 704, 704, 704, 704, 705, 705, 705, 705, 705, 705, 705, 705, 706, 706, 706, 706, 706, 706, 706, 706, 707, 707, 707, 707, 707, 707, 707, 707, -1 -// CHECK: 708, 708, 708, 708, 708, 708, 708, 708, 709, 709, 709, 709, 709, 709, 709, 709, 710, 710, 710, 710, 710, 710, 710, 710, 711, 711, 711, 711, 711, 711, 711, 711, 704, 704, 704, 704, 704, 704, 704, 704, 705, 705, 705, 705, 705, 705, 705, 705, 706, 706, 706, 706, 706, 706, 706, 706, 707, 707, 707, 707, 707, 707, 707, 707, 725, 725, 725, 725, 725, 725, 725, 725, 724, 724, 724, 724, 724, 724, 724, 724, 727, 727, 727, 727, 727, 727, 727, 727, 726, 726, 726, 726, 726, 726, 726, 726, 721, 721, 721, 721, 721, 721, 721, 721, 720, 720, 720, 720, 720, 720, 720, 720, 723, 723, 723, 723, 723, 723, 723, 723, 722, 722, 722, 722, 722, 722, 722, 722, -1 -// CHECK: 725, 725, 725, 725, 725, 725, 725, 725, 724, 724, 724, 724, 724, 724, 724, 724, 727, 727, 727, 727, 727, 727, 727, 727, 726, 726, 726, 726, 726, 726, 726, 726, 721, 721, 721, 721, 721, 721, 721, 721, 720, 720, 720, 720, 720, 720, 720, 720, 723, 723, 723, 723, 723, 723, 723, 723, 722, 722, 722, 722, 722, 722, 722, 722, 742, 742, 742, 742, 742, 742, 742, 742, 743, 743, 743, 743, 743, 743, 743, 743, 740, 740, 740, 740, 740, 740, 740, 740, 741, 741, 741, 741, 741, 741, 741, 741, 738, 738, 738, 738, 738, 738, 738, 738, 739, 739, 739, 739, 739, 739, 739, 739, 736, 736, 736, 736, 736, 736, 736, 736, 737, 737, 737, 737, 737, 737, 737, 737, -1 -// CHECK: 742, 742, 742, 742, 742, 742, 742, 742, 743, 743, 743, 743, 743, 743, 743, 743, 740, 740, 740, 740, 740, 740, 740, 740, 741, 741, 741, 741, 741, 741, 741, 741, 738, 738, 738, 738, 738, 738, 738, 738, 739, 739, 739, 739, 739, 739, 739, 739, 736, 736, 736, 736, 736, 736, 736, 736, 737, 737, 737, 737, 737, 737, 737, 737, 759, 759, 759, 759, 759, 759, 759, 759, 758, 758, 758, 758, 758, 758, 758, 758, 757, 757, 757, 757, 757, 757, 757, 757, 756, 756, 756, 756, 756, 756, 756, 756, 755, 755, 755, 755, 755, 755, 755, 755, 754, 754, 754, 754, 754, 754, 754, 754, 753, 753, 753, 753, 753, 753, 753, 753, 752, 752, 752, 752, 752, 752, 752, 752, -1 -// CHECK: 759, 759, 759, 759, 759, 759, 759, 759, 758, 758, 758, 758, 758, 758, 758, 758, 757, 757, 757, 757, 757, 757, 757, 757, 756, 756, 756, 756, 756, 756, 756, 756, 755, 755, 755, 755, 755, 755, 755, 755, 754, 754, 754, 754, 754, 754, 754, 754, 753, 753, 753, 753, 753, 753, 753, 753, 752, 752, 752, 752, 752, 752, 752, 752, 768, 768, 768, 768, 768, 768, 768, 768, 769, 769, 769, 769, 769, 769, 769, 769, 770, 770, 770, 770, 770, 770, 770, 770, 771, 771, 771, 771, 771, 771, 771, 771, 772, 772, 772, 772, 772, 772, 772, 772, 773, 773, 773, 773, 773, 773, 773, 773, 774, 774, 774, 774, 774, 774, 774, 774, 775, 775, 775, 775, 775, 775, 775, 775, -1 -// CHECK: 768, 768, 768, 768, 768, 768, 768, 768, 769, 769, 769, 769, 769, 769, 769, 769, 770, 770, 770, 770, 770, 770, 770, 770, 771, 771, 771, 771, 771, 771, 771, 771, 772, 772, 772, 772, 772, 772, 772, 772, 773, 773, 773, 773, 773, 773, 773, 773, 774, 774, 774, 774, 774, 774, 774, 774, 775, 775, 775, 775, 775, 775, 775, 775, 785, 785, 785, 785, 785, 785, 785, 785, 784, 784, 784, 784, 784, 784, 784, 784, 787, 787, 787, 787, 787, 787, 787, 787, 786, 786, 786, 786, 786, 786, 786, 786, 789, 789, 789, 789, 789, 789, 789, 789, 788, 788, 788, 788, 788, 788, 788, 788, 791, 791, 791, 791, 791, 791, 791, 791, 790, 790, 790, 790, 790, 790, 790, 790, -1 -// CHECK: 785, 785, 785, 785, 785, 785, 785, 785, 784, 784, 784, 784, 784, 784, 784, 784, 787, 787, 787, 787, 787, 787, 787, 787, 786, 786, 786, 786, 786, 786, 786, 786, 789, 789, 789, 789, 789, 789, 789, 789, 788, 788, 788, 788, 788, 788, 788, 788, 791, 791, 791, 791, 791, 791, 791, 791, 790, 790, 790, 790, 790, 790, 790, 790, 802, 802, 802, 802, 802, 802, 802, 802, 803, 803, 803, 803, 803, 803, 803, 803, 800, 800, 800, 800, 800, 800, 800, 800, 801, 801, 801, 801, 801, 801, 801, 801, 806, 806, 806, 806, 806, 806, 806, 806, 807, 807, 807, 807, 807, 807, 807, 807, 804, 804, 804, 804, 804, 804, 804, 804, 805, 805, 805, 805, 805, 805, 805, 805, -1 -// CHECK: 802, 802, 802, 802, 802, 802, 802, 802, 803, 803, 803, 803, 803, 803, 803, 803, 800, 800, 800, 800, 800, 800, 800, 800, 801, 801, 801, 801, 801, 801, 801, 801, 806, 806, 806, 806, 806, 806, 806, 806, 807, 807, 807, 807, 807, 807, 807, 807, 804, 804, 804, 804, 804, 804, 804, 804, 805, 805, 805, 805, 805, 805, 805, 805, 819, 819, 819, 819, 819, 819, 819, 819, 818, 818, 818, 818, 818, 818, 818, 818, 817, 817, 817, 817, 817, 817, 817, 817, 816, 816, 816, 816, 816, 816, 816, 816, 823, 823, 823, 823, 823, 823, 823, 823, 822, 822, 822, 822, 822, 822, 822, 822, 821, 821, 821, 821, 821, 821, 821, 821, 820, 820, 820, 820, 820, 820, 820, 820, -1 -// CHECK: 819, 819, 819, 819, 819, 819, 819, 819, 818, 818, 818, 818, 818, 818, 818, 818, 817, 817, 817, 817, 817, 817, 817, 817, 816, 816, 816, 816, 816, 816, 816, 816, 823, 823, 823, 823, 823, 823, 823, 823, 822, 822, 822, 822, 822, 822, 822, 822, 821, 821, 821, 821, 821, 821, 821, 821, 820, 820, 820, 820, 820, 820, 820, 820, 836, 836, 836, 836, 836, 836, 836, 836, 837, 837, 837, 837, 837, 837, 837, 837, 838, 838, 838, 838, 838, 838, 838, 838, 839, 839, 839, 839, 839, 839, 839, 839, 832, 832, 832, 832, 832, 832, 832, 832, 833, 833, 833, 833, 833, 833, 833, 833, 834, 834, 834, 834, 834, 834, 834, 834, 835, 835, 835, 835, 835, 835, 835, 835, -1 -// CHECK: 836, 836, 836, 836, 836, 836, 836, 836, 837, 837, 837, 837, 837, 837, 837, 837, 838, 838, 838, 838, 838, 838, 838, 838, 839, 839, 839, 839, 839, 839, 839, 839, 832, 832, 832, 832, 832, 832, 832, 832, 833, 833, 833, 833, 833, 833, 833, 833, 834, 834, 834, 834, 834, 834, 834, 834, 835, 835, 835, 835, 835, 835, 835, 835, 853, 853, 853, 853, 853, 853, 853, 853, 852, 852, 852, 852, 852, 852, 852, 852, 855, 855, 855, 855, 855, 855, 855, 855, 854, 854, 854, 854, 854, 854, 854, 854, 849, 849, 849, 849, 849, 849, 849, 849, 848, 848, 848, 848, 848, 848, 848, 848, 851, 851, 851, 851, 851, 851, 851, 851, 850, 850, 850, 850, 850, 850, 850, 850, -1 -// CHECK: 853, 853, 853, 853, 853, 853, 853, 853, 852, 852, 852, 852, 852, 852, 852, 852, 855, 855, 855, 855, 855, 855, 855, 855, 854, 854, 854, 854, 854, 854, 854, 854, 849, 849, 849, 849, 849, 849, 849, 849, 848, 848, 848, 848, 848, 848, 848, 848, 851, 851, 851, 851, 851, 851, 851, 851, 850, 850, 850, 850, 850, 850, 850, 850, 870, 870, 870, 870, 870, 870, 870, 870, 871, 871, 871, 871, 871, 871, 871, 871, 868, 868, 868, 868, 868, 868, 868, 868, 869, 869, 869, 869, 869, 869, 869, 869, 866, 866, 866, 866, 866, 866, 866, 866, 867, 867, 867, 867, 867, 867, 867, 867, 864, 864, 864, 864, 864, 864, 864, 864, 865, 865, 865, 865, 865, 865, 865, 865, -1 -// CHECK: 870, 870, 870, 870, 870, 870, 870, 870, 871, 871, 871, 871, 871, 871, 871, 871, 868, 868, 868, 868, 868, 868, 868, 868, 869, 869, 869, 869, 869, 869, 869, 869, 866, 866, 866, 866, 866, 866, 866, 866, 867, 867, 867, 867, 867, 867, 867, 867, 864, 864, 864, 864, 864, 864, 864, 864, 865, 865, 865, 865, 865, 865, 865, 865, 887, 887, 887, 887, 887, 887, 887, 887, 886, 886, 886, 886, 886, 886, 886, 886, 885, 885, 885, 885, 885, 885, 885, 885, 884, 884, 884, 884, 884, 884, 884, 884, 883, 883, 883, 883, 883, 883, 883, 883, 882, 882, 882, 882, 882, 882, 882, 882, 881, 881, 881, 881, 881, 881, 881, 881, 880, 880, 880, 880, 880, 880, 880, 880, -1 -// CHECK: 887, 887, 887, 887, 887, 887, 887, 887, 886, 886, 886, 886, 886, 886, 886, 886, 885, 885, 885, 885, 885, 885, 885, 885, 884, 884, 884, 884, 884, 884, 884, 884, 883, 883, 883, 883, 883, 883, 883, 883, 882, 882, 882, 882, 882, 882, 882, 882, 881, 881, 881, 881, 881, 881, 881, 881, 880, 880, 880, 880, 880, 880, 880, 880, 896, 896, 896, 896, 896, 896, 896, 896, 897, 897, 897, 897, 897, 897, 897, 897, 898, 898, 898, 898, 898, 898, 898, 898, 899, 899, 899, 899, 899, 899, 899, 899, 900, 900, 900, 900, 900, 900, 900, 900, 901, 901, 901, 901, 901, 901, 901, 901, 902, 902, 902, 902, 902, 902, 902, 902, 903, 903, 903, 903, 903, 903, 903, 903, -1 -// CHECK: 896, 896, 896, 896, 896, 896, 896, 896, 897, 897, 897, 897, 897, 897, 897, 897, 898, 898, 898, 898, 898, 898, 898, 898, 899, 899, 899, 899, 899, 899, 899, 899, 900, 900, 900, 900, 900, 900, 900, 900, 901, 901, 901, 901, 901, 901, 901, 901, 902, 902, 902, 902, 902, 902, 902, 902, 903, 903, 903, 903, 903, 903, 903, 903, 913, 913, 913, 913, 913, 913, 913, 913, 912, 912, 912, 912, 912, 912, 912, 912, 915, 915, 915, 915, 915, 915, 915, 915, 914, 914, 914, 914, 914, 914, 914, 914, 917, 917, 917, 917, 917, 917, 917, 917, 916, 916, 916, 916, 916, 916, 916, 916, 919, 919, 919, 919, 919, 919, 919, 919, 918, 918, 918, 918, 918, 918, 918, 918, -1 -// CHECK: 913, 913, 913, 913, 913, 913, 913, 913, 912, 912, 912, 912, 912, 912, 912, 912, 915, 915, 915, 915, 915, 915, 915, 915, 914, 914, 914, 914, 914, 914, 914, 914, 917, 917, 917, 917, 917, 917, 917, 917, 916, 916, 916, 916, 916, 916, 916, 916, 919, 919, 919, 919, 919, 919, 919, 919, 918, 918, 918, 918, 918, 918, 918, 918, 930, 930, 930, 930, 930, 930, 930, 930, 931, 931, 931, 931, 931, 931, 931, 931, 928, 928, 928, 928, 928, 928, 928, 928, 929, 929, 929, 929, 929, 929, 929, 929, 934, 934, 934, 934, 934, 934, 934, 934, 935, 935, 935, 935, 935, 935, 935, 935, 932, 932, 932, 932, 932, 932, 932, 932, 933, 933, 933, 933, 933, 933, 933, 933, -1 -// CHECK: 930, 930, 930, 930, 930, 930, 930, 930, 931, 931, 931, 931, 931, 931, 931, 931, 928, 928, 928, 928, 928, 928, 928, 928, 929, 929, 929, 929, 929, 929, 929, 929, 934, 934, 934, 934, 934, 934, 934, 934, 935, 935, 935, 935, 935, 935, 935, 935, 932, 932, 932, 932, 932, 932, 932, 932, 933, 933, 933, 933, 933, 933, 933, 933, 947, 947, 947, 947, 947, 947, 947, 947, 946, 946, 946, 946, 946, 946, 946, 946, 945, 945, 945, 945, 945, 945, 945, 945, 944, 944, 944, 944, 944, 944, 944, 944, 951, 951, 951, 951, 951, 951, 951, 951, 950, 950, 950, 950, 950, 950, 950, 950, 949, 949, 949, 949, 949, 949, 949, 949, 948, 948, 948, 948, 948, 948, 948, 948, -1 -// CHECK: 947, 947, 947, 947, 947, 947, 947, 947, 946, 946, 946, 946, 946, 946, 946, 946, 945, 945, 945, 945, 945, 945, 945, 945, 944, 944, 944, 944, 944, 944, 944, 944, 951, 951, 951, 951, 951, 951, 951, 951, 950, 950, 950, 950, 950, 950, 950, 950, 949, 949, 949, 949, 949, 949, 949, 949, 948, 948, 948, 948, 948, 948, 948, 948, 964, 964, 964, 964, 964, 964, 964, 964, 965, 965, 965, 965, 965, 965, 965, 965, 966, 966, 966, 966, 966, 966, 966, 966, 967, 967, 967, 967, 967, 967, 967, 967, 960, 960, 960, 960, 960, 960, 960, 960, 961, 961, 961, 961, 961, 961, 961, 961, 962, 962, 962, 962, 962, 962, 962, 962, 963, 963, 963, 963, 963, 963, 963, 963, -1 -// CHECK: 964, 964, 964, 964, 964, 964, 964, 964, 965, 965, 965, 965, 965, 965, 965, 965, 966, 966, 966, 966, 966, 966, 966, 966, 967, 967, 967, 967, 967, 967, 967, 967, 960, 960, 960, 960, 960, 960, 960, 960, 961, 961, 961, 961, 961, 961, 961, 961, 962, 962, 962, 962, 962, 962, 962, 962, 963, 963, 963, 963, 963, 963, 963, 963, 981, 981, 981, 981, 981, 981, 981, 981, 980, 980, 980, 980, 980, 980, 980, 980, 983, 983, 983, 983, 983, 983, 983, 983, 982, 982, 982, 982, 982, 982, 982, 982, 977, 977, 977, 977, 977, 977, 977, 977, 976, 976, 976, 976, 976, 976, 976, 976, 979, 979, 979, 979, 979, 979, 979, 979, 978, 978, 978, 978, 978, 978, 978, 978, -1 -// CHECK: 981, 981, 981, 981, 981, 981, 981, 981, 980, 980, 980, 980, 980, 980, 980, 980, 983, 983, 983, 983, 983, 983, 983, 983, 982, 982, 982, 982, 982, 982, 982, 982, 977, 977, 977, 977, 977, 977, 977, 977, 976, 976, 976, 976, 976, 976, 976, 976, 979, 979, 979, 979, 979, 979, 979, 979, 978, 978, 978, 978, 978, 978, 978, 978, 998, 998, 998, 998, 998, 998, 998, 998, 999, 999, 999, 999, 999, 999, 999, 999, 996, 996, 996, 996, 996, 996, 996, 996, 997, 997, 997, 997, 997, 997, 997, 997, 994, 994, 994, 994, 994, 994, 994, 994, 995, 995, 995, 995, 995, 995, 995, 995, 992, 992, 992, 992, 992, 992, 992, 992, 993, 993, 993, 993, 993, 993, 993, 993, -1 -// CHECK: 998, 998, 998, 998, 998, 998, 998, 998, 999, 999, 999, 999, 999, 999, 999, 999, 996, 996, 996, 996, 996, 996, 996, 996, 997, 997, 997, 997, 997, 997, 997, 997, 994, 994, 994, 994, 994, 994, 994, 994, 995, 995, 995, 995, 995, 995, 995, 995, 992, 992, 992, 992, 992, 992, 992, 992, 993, 993, 993, 993, 993, 993, 993, 993, 1015, 1015, 1015, 1015, 1015, 1015, 1015, 1015, 1014, 1014, 1014, 1014, 1014, 1014, 1014, 1014, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1011, 1011, 1011, 1011, 1011, 1011, 1011, 1011, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1009, 1009, 1009, 1009, 1009, 1009, 1009, 1009, 1008, 1008, 1008, 1008, 1008, 1008, 1008, 1008, -1 -// CHECK: 1015, 1015, 1015, 1015, 1015, 1015, 1015, 1015, 1014, 1014, 1014, 1014, 1014, 1014, 1014, 1014, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1011, 1011, 1011, 1011, 1011, 1011, 1011, 1011, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1009, 1009, 1009, 1009, 1009, 1009, 1009, 1009, 1008, 1008, 1008, 1008, 1008, 1008, 1008, 1008, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1 +// CHECK: ===--- Matrix B ---=== +// CHECK: 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 17, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18, 18, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 23, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22 +// CHECK: 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 36, 36, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 51, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 49, 49, 48, 48, 48, 48, 48, 48, 48, 48, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 52, 52 +// CHECK: 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 85, 85, 85, 85, 85, 85, 85, 85, 84, 84, 84, 84, 84, 84, 84, 84, 87, 87, 87, 87, 87, 87, 87, 87, 86, 86, 86, 86, 86, 86, 86, 86, 81, 81, 81, 81, 81, 81, 81, 81, 80, 80, 80, 80, 80, 80, 80, 80, 83, 83, 83, 83, 83, 83, 83, 83, 82, 82, 82, 82, 82, 82, 82, 82 +// CHECK: 102, 102, 102, 102, 102, 102, 102, 102, 103, 103, 103, 103, 103, 103, 103, 103, 100, 100, 100, 100, 100, 100, 100, 100, 101, 101, 101, 101, 101, 101, 101, 101, 98, 98, 98, 98, 98, 98, 98, 98, 99, 99, 99, 99, 99, 99, 99, 99, 96, 96, 96, 96, 96, 96, 96, 96, 97, 97, 97, 97, 97, 97, 97, 97, 119, 119, 119, 119, 119, 119, 119, 119, 118, 118, 118, 118, 118, 118, 118, 118, 117, 117, 117, 117, 117, 117, 117, 117, 116, 116, 116, 116, 116, 116, 116, 116, 115, 115, 115, 115, 115, 115, 115, 115, 114, 114, 114, 114, 114, 114, 114, 114, 113, 113, 113, 113, 113, 113, 113, 113, 112, 112, 112, 112, 112, 112, 112, 112 +// CHECK: 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 130, 130, 130, 130, 130, 130, 130, 130, 131, 131, 131, 131, 131, 131, 131, 131, 132, 132, 132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133, 133, 134, 134, 134, 134, 134, 134, 134, 134, 135, 135, 135, 135, 135, 135, 135, 135, 145, 145, 145, 145, 145, 145, 145, 145, 144, 144, 144, 144, 144, 144, 144, 144, 147, 147, 147, 147, 147, 147, 147, 147, 146, 146, 146, 146, 146, 146, 146, 146, 149, 149, 149, 149, 149, 149, 149, 149, 148, 148, 148, 148, 148, 148, 148, 148, 151, 151, 151, 151, 151, 151, 151, 151, 150, 150, 150, 150, 150, 150, 150, 150 +// CHECK: 162, 162, 162, 162, 162, 162, 162, 162, 163, 163, 163, 163, 163, 163, 163, 163, 160, 160, 160, 160, 160, 160, 160, 160, 161, 161, 161, 161, 161, 161, 161, 161, 166, 166, 166, 166, 166, 166, 166, 166, 167, 167, 167, 167, 167, 167, 167, 167, 164, 164, 164, 164, 164, 164, 164, 164, 165, 165, 165, 165, 165, 165, 165, 165, 179, 179, 179, 179, 179, 179, 179, 179, 178, 178, 178, 178, 178, 178, 178, 178, 177, 177, 177, 177, 177, 177, 177, 177, 176, 176, 176, 176, 176, 176, 176, 176, 183, 183, 183, 183, 183, 183, 183, 183, 182, 182, 182, 182, 182, 182, 182, 182, 181, 181, 181, 181, 181, 181, 181, 181, 180, 180, 180, 180, 180, 180, 180, 180 +// CHECK: 196, 196, 196, 196, 196, 196, 196, 196, 197, 197, 197, 197, 197, 197, 197, 197, 198, 198, 198, 198, 198, 198, 198, 198, 199, 199, 199, 199, 199, 199, 199, 199, 192, 192, 192, 192, 192, 192, 192, 192, 193, 193, 193, 193, 193, 193, 193, 193, 194, 194, 194, 194, 194, 194, 194, 194, 195, 195, 195, 195, 195, 195, 195, 195, 213, 213, 213, 213, 213, 213, 213, 213, 212, 212, 212, 212, 212, 212, 212, 212, 215, 215, 215, 215, 215, 215, 215, 215, 214, 214, 214, 214, 214, 214, 214, 214, 209, 209, 209, 209, 209, 209, 209, 209, 208, 208, 208, 208, 208, 208, 208, 208, 211, 211, 211, 211, 211, 211, 211, 211, 210, 210, 210, 210, 210, 210, 210, 210 +// CHECK: 230, 230, 230, 230, 230, 230, 230, 230, 231, 231, 231, 231, 231, 231, 231, 231, 228, 228, 228, 228, 228, 228, 228, 228, 229, 229, 229, 229, 229, 229, 229, 229, 226, 226, 226, 226, 226, 226, 226, 226, 227, 227, 227, 227, 227, 227, 227, 227, 224, 224, 224, 224, 224, 224, 224, 224, 225, 225, 225, 225, 225, 225, 225, 225, 247, 247, 247, 247, 247, 247, 247, 247, 246, 246, 246, 246, 246, 246, 246, 246, 245, 245, 245, 245, 245, 245, 245, 245, 244, 244, 244, 244, 244, 244, 244, 244, 243, 243, 243, 243, 243, 243, 243, 243, 242, 242, 242, 242, 242, 242, 242, 242, 241, 241, 241, 241, 241, 241, 241, 241, 240, 240, 240, 240, 240, 240, 240, 240 +// CHECK: 256, 256, 256, 256, 256, 256, 256, 256, 257, 257, 257, 257, 257, 257, 257, 257, 258, 258, 258, 258, 258, 258, 258, 258, 259, 259, 259, 259, 259, 259, 259, 259, 260, 260, 260, 260, 260, 260, 260, 260, 261, 261, 261, 261, 261, 261, 261, 261, 262, 262, 262, 262, 262, 262, 262, 262, 263, 263, 263, 263, 263, 263, 263, 263, 273, 273, 273, 273, 273, 273, 273, 273, 272, 272, 272, 272, 272, 272, 272, 272, 275, 275, 275, 275, 275, 275, 275, 275, 274, 274, 274, 274, 274, 274, 274, 274, 277, 277, 277, 277, 277, 277, 277, 277, 276, 276, 276, 276, 276, 276, 276, 276, 279, 279, 279, 279, 279, 279, 279, 279, 278, 278, 278, 278, 278, 278, 278, 278 +// CHECK: 290, 290, 290, 290, 290, 290, 290, 290, 291, 291, 291, 291, 291, 291, 291, 291, 288, 288, 288, 288, 288, 288, 288, 288, 289, 289, 289, 289, 289, 289, 289, 289, 294, 294, 294, 294, 294, 294, 294, 294, 295, 295, 295, 295, 295, 295, 295, 295, 292, 292, 292, 292, 292, 292, 292, 292, 293, 293, 293, 293, 293, 293, 293, 293, 307, 307, 307, 307, 307, 307, 307, 307, 306, 306, 306, 306, 306, 306, 306, 306, 305, 305, 305, 305, 305, 305, 305, 305, 304, 304, 304, 304, 304, 304, 304, 304, 311, 311, 311, 311, 311, 311, 311, 311, 310, 310, 310, 310, 310, 310, 310, 310, 309, 309, 309, 309, 309, 309, 309, 309, 308, 308, 308, 308, 308, 308, 308, 308 +// CHECK: 324, 324, 324, 324, 324, 324, 324, 324, 325, 325, 325, 325, 325, 325, 325, 325, 326, 326, 326, 326, 326, 326, 326, 326, 327, 327, 327, 327, 327, 327, 327, 327, 320, 320, 320, 320, 320, 320, 320, 320, 321, 321, 321, 321, 321, 321, 321, 321, 322, 322, 322, 322, 322, 322, 322, 322, 323, 323, 323, 323, 323, 323, 323, 323, 341, 341, 341, 341, 341, 341, 341, 341, 340, 340, 340, 340, 340, 340, 340, 340, 343, 343, 343, 343, 343, 343, 343, 343, 342, 342, 342, 342, 342, 342, 342, 342, 337, 337, 337, 337, 337, 337, 337, 337, 336, 336, 336, 336, 336, 336, 336, 336, 339, 339, 339, 339, 339, 339, 339, 339, 338, 338, 338, 338, 338, 338, 338, 338 +// CHECK: 358, 358, 358, 358, 358, 358, 358, 358, 359, 359, 359, 359, 359, 359, 359, 359, 356, 356, 356, 356, 356, 356, 356, 356, 357, 357, 357, 357, 357, 357, 357, 357, 354, 354, 354, 354, 354, 354, 354, 354, 355, 355, 355, 355, 355, 355, 355, 355, 352, 352, 352, 352, 352, 352, 352, 352, 353, 353, 353, 353, 353, 353, 353, 353, 375, 375, 375, 375, 375, 375, 375, 375, 374, 374, 374, 374, 374, 374, 374, 374, 373, 373, 373, 373, 373, 373, 373, 373, 372, 372, 372, 372, 372, 372, 372, 372, 371, 371, 371, 371, 371, 371, 371, 371, 370, 370, 370, 370, 370, 370, 370, 370, 369, 369, 369, 369, 369, 369, 369, 369, 368, 368, 368, 368, 368, 368, 368, 368 +// CHECK: 384, 384, 384, 384, 384, 384, 384, 384, 385, 385, 385, 385, 385, 385, 385, 385, 386, 386, 386, 386, 386, 386, 386, 386, 387, 387, 387, 387, 387, 387, 387, 387, 388, 388, 388, 388, 388, 388, 388, 388, 389, 389, 389, 389, 389, 389, 389, 389, 390, 390, 390, 390, 390, 390, 390, 390, 391, 391, 391, 391, 391, 391, 391, 391, 401, 401, 401, 401, 401, 401, 401, 401, 400, 400, 400, 400, 400, 400, 400, 400, 403, 403, 403, 403, 403, 403, 403, 403, 402, 402, 402, 402, 402, 402, 402, 402, 405, 405, 405, 405, 405, 405, 405, 405, 404, 404, 404, 404, 404, 404, 404, 404, 407, 407, 407, 407, 407, 407, 407, 407, 406, 406, 406, 406, 406, 406, 406, 406 +// CHECK: 418, 418, 418, 418, 418, 418, 418, 418, 419, 419, 419, 419, 419, 419, 419, 419, 416, 416, 416, 416, 416, 416, 416, 416, 417, 417, 417, 417, 417, 417, 417, 417, 422, 422, 422, 422, 422, 422, 422, 422, 423, 423, 423, 423, 423, 423, 423, 423, 420, 420, 420, 420, 420, 420, 420, 420, 421, 421, 421, 421, 421, 421, 421, 421, 435, 435, 435, 435, 435, 435, 435, 435, 434, 434, 434, 434, 434, 434, 434, 434, 433, 433, 433, 433, 433, 433, 433, 433, 432, 432, 432, 432, 432, 432, 432, 432, 439, 439, 439, 439, 439, 439, 439, 439, 438, 438, 438, 438, 438, 438, 438, 438, 437, 437, 437, 437, 437, 437, 437, 437, 436, 436, 436, 436, 436, 436, 436, 436 +// CHECK: 452, 452, 452, 452, 452, 452, 452, 452, 453, 453, 453, 453, 453, 453, 453, 453, 454, 454, 454, 454, 454, 454, 454, 454, 455, 455, 455, 455, 455, 455, 455, 455, 448, 448, 448, 448, 448, 448, 448, 448, 449, 449, 449, 449, 449, 449, 449, 449, 450, 450, 450, 450, 450, 450, 450, 450, 451, 451, 451, 451, 451, 451, 451, 451, 469, 469, 469, 469, 469, 469, 469, 469, 468, 468, 468, 468, 468, 468, 468, 468, 471, 471, 471, 471, 471, 471, 471, 471, 470, 470, 470, 470, 470, 470, 470, 470, 465, 465, 465, 465, 465, 465, 465, 465, 464, 464, 464, 464, 464, 464, 464, 464, 467, 467, 467, 467, 467, 467, 467, 467, 466, 466, 466, 466, 466, 466, 466, 466 +// CHECK: 486, 486, 486, 486, 486, 486, 486, 486, 487, 487, 487, 487, 487, 487, 487, 487, 484, 484, 484, 484, 484, 484, 484, 484, 485, 485, 485, 485, 485, 485, 485, 485, 482, 482, 482, 482, 482, 482, 482, 482, 483, 483, 483, 483, 483, 483, 483, 483, 480, 480, 480, 480, 480, 480, 480, 480, 481, 481, 481, 481, 481, 481, 481, 481, 503, 503, 503, 503, 503, 503, 503, 503, 502, 502, 502, 502, 502, 502, 502, 502, 501, 501, 501, 501, 501, 501, 501, 501, 500, 500, 500, 500, 500, 500, 500, 500, 499, 499, 499, 499, 499, 499, 499, 499, 498, 498, 498, 498, 498, 498, 498, 498, 497, 497, 497, 497, 497, 497, 497, 497, 496, 496, 496, 496, 496, 496, 496, 496 +// CHECK: 512, 512, 512, 512, 512, 512, 512, 512, 513, 513, 513, 513, 513, 513, 513, 513, 514, 514, 514, 514, 514, 514, 514, 514, 515, 515, 515, 515, 515, 515, 515, 515, 516, 516, 516, 516, 516, 516, 516, 516, 517, 517, 517, 517, 517, 517, 517, 517, 518, 518, 518, 518, 518, 518, 518, 518, 519, 519, 519, 519, 519, 519, 519, 519, 529, 529, 529, 529, 529, 529, 529, 529, 528, 528, 528, 528, 528, 528, 528, 528, 531, 531, 531, 531, 531, 531, 531, 531, 530, 530, 530, 530, 530, 530, 530, 530, 533, 533, 533, 533, 533, 533, 533, 533, 532, 532, 532, 532, 532, 532, 532, 532, 535, 535, 535, 535, 535, 535, 535, 535, 534, 534, 534, 534, 534, 534, 534, 534 +// CHECK: 546, 546, 546, 546, 546, 546, 546, 546, 547, 547, 547, 547, 547, 547, 547, 547, 544, 544, 544, 544, 544, 544, 544, 544, 545, 545, 545, 545, 545, 545, 545, 545, 550, 550, 550, 550, 550, 550, 550, 550, 551, 551, 551, 551, 551, 551, 551, 551, 548, 548, 548, 548, 548, 548, 548, 548, 549, 549, 549, 549, 549, 549, 549, 549, 563, 563, 563, 563, 563, 563, 563, 563, 562, 562, 562, 562, 562, 562, 562, 562, 561, 561, 561, 561, 561, 561, 561, 561, 560, 560, 560, 560, 560, 560, 560, 560, 567, 567, 567, 567, 567, 567, 567, 567, 566, 566, 566, 566, 566, 566, 566, 566, 565, 565, 565, 565, 565, 565, 565, 565, 564, 564, 564, 564, 564, 564, 564, 564 +// CHECK: 580, 580, 580, 580, 580, 580, 580, 580, 581, 581, 581, 581, 581, 581, 581, 581, 582, 582, 582, 582, 582, 582, 582, 582, 583, 583, 583, 583, 583, 583, 583, 583, 576, 576, 576, 576, 576, 576, 576, 576, 577, 577, 577, 577, 577, 577, 577, 577, 578, 578, 578, 578, 578, 578, 578, 578, 579, 579, 579, 579, 579, 579, 579, 579, 597, 597, 597, 597, 597, 597, 597, 597, 596, 596, 596, 596, 596, 596, 596, 596, 599, 599, 599, 599, 599, 599, 599, 599, 598, 598, 598, 598, 598, 598, 598, 598, 593, 593, 593, 593, 593, 593, 593, 593, 592, 592, 592, 592, 592, 592, 592, 592, 595, 595, 595, 595, 595, 595, 595, 595, 594, 594, 594, 594, 594, 594, 594, 594 +// CHECK: 614, 614, 614, 614, 614, 614, 614, 614, 615, 615, 615, 615, 615, 615, 615, 615, 612, 612, 612, 612, 612, 612, 612, 612, 613, 613, 613, 613, 613, 613, 613, 613, 610, 610, 610, 610, 610, 610, 610, 610, 611, 611, 611, 611, 611, 611, 611, 611, 608, 608, 608, 608, 608, 608, 608, 608, 609, 609, 609, 609, 609, 609, 609, 609, 631, 631, 631, 631, 631, 631, 631, 631, 630, 630, 630, 630, 630, 630, 630, 630, 629, 629, 629, 629, 629, 629, 629, 629, 628, 628, 628, 628, 628, 628, 628, 628, 627, 627, 627, 627, 627, 627, 627, 627, 626, 626, 626, 626, 626, 626, 626, 626, 625, 625, 625, 625, 625, 625, 625, 625, 624, 624, 624, 624, 624, 624, 624, 624 +// CHECK: 640, 640, 640, 640, 640, 640, 640, 640, 641, 641, 641, 641, 641, 641, 641, 641, 642, 642, 642, 642, 642, 642, 642, 642, 643, 643, 643, 643, 643, 643, 643, 643, 644, 644, 644, 644, 644, 644, 644, 644, 645, 645, 645, 645, 645, 645, 645, 645, 646, 646, 646, 646, 646, 646, 646, 646, 647, 647, 647, 647, 647, 647, 647, 647, 657, 657, 657, 657, 657, 657, 657, 657, 656, 656, 656, 656, 656, 656, 656, 656, 659, 659, 659, 659, 659, 659, 659, 659, 658, 658, 658, 658, 658, 658, 658, 658, 661, 661, 661, 661, 661, 661, 661, 661, 660, 660, 660, 660, 660, 660, 660, 660, 663, 663, 663, 663, 663, 663, 663, 663, 662, 662, 662, 662, 662, 662, 662, 662 +// CHECK: 674, 674, 674, 674, 674, 674, 674, 674, 675, 675, 675, 675, 675, 675, 675, 675, 672, 672, 672, 672, 672, 672, 672, 672, 673, 673, 673, 673, 673, 673, 673, 673, 678, 678, 678, 678, 678, 678, 678, 678, 679, 679, 679, 679, 679, 679, 679, 679, 676, 676, 676, 676, 676, 676, 676, 676, 677, 677, 677, 677, 677, 677, 677, 677, 691, 691, 691, 691, 691, 691, 691, 691, 690, 690, 690, 690, 690, 690, 690, 690, 689, 689, 689, 689, 689, 689, 689, 689, 688, 688, 688, 688, 688, 688, 688, 688, 695, 695, 695, 695, 695, 695, 695, 695, 694, 694, 694, 694, 694, 694, 694, 694, 693, 693, 693, 693, 693, 693, 693, 693, 692, 692, 692, 692, 692, 692, 692, 692 +// CHECK: 708, 708, 708, 708, 708, 708, 708, 708, 709, 709, 709, 709, 709, 709, 709, 709, 710, 710, 710, 710, 710, 710, 710, 710, 711, 711, 711, 711, 711, 711, 711, 711, 704, 704, 704, 704, 704, 704, 704, 704, 705, 705, 705, 705, 705, 705, 705, 705, 706, 706, 706, 706, 706, 706, 706, 706, 707, 707, 707, 707, 707, 707, 707, 707, 725, 725, 725, 725, 725, 725, 725, 725, 724, 724, 724, 724, 724, 724, 724, 724, 727, 727, 727, 727, 727, 727, 727, 727, 726, 726, 726, 726, 726, 726, 726, 726, 721, 721, 721, 721, 721, 721, 721, 721, 720, 720, 720, 720, 720, 720, 720, 720, 723, 723, 723, 723, 723, 723, 723, 723, 722, 722, 722, 722, 722, 722, 722, 722 +// CHECK: 742, 742, 742, 742, 742, 742, 742, 742, 743, 743, 743, 743, 743, 743, 743, 743, 740, 740, 740, 740, 740, 740, 740, 740, 741, 741, 741, 741, 741, 741, 741, 741, 738, 738, 738, 738, 738, 738, 738, 738, 739, 739, 739, 739, 739, 739, 739, 739, 736, 736, 736, 736, 736, 736, 736, 736, 737, 737, 737, 737, 737, 737, 737, 737, 759, 759, 759, 759, 759, 759, 759, 759, 758, 758, 758, 758, 758, 758, 758, 758, 757, 757, 757, 757, 757, 757, 757, 757, 756, 756, 756, 756, 756, 756, 756, 756, 755, 755, 755, 755, 755, 755, 755, 755, 754, 754, 754, 754, 754, 754, 754, 754, 753, 753, 753, 753, 753, 753, 753, 753, 752, 752, 752, 752, 752, 752, 752, 752 +// CHECK: 768, 768, 768, 768, 768, 768, 768, 768, 769, 769, 769, 769, 769, 769, 769, 769, 770, 770, 770, 770, 770, 770, 770, 770, 771, 771, 771, 771, 771, 771, 771, 771, 772, 772, 772, 772, 772, 772, 772, 772, 773, 773, 773, 773, 773, 773, 773, 773, 774, 774, 774, 774, 774, 774, 774, 774, 775, 775, 775, 775, 775, 775, 775, 775, 785, 785, 785, 785, 785, 785, 785, 785, 784, 784, 784, 784, 784, 784, 784, 784, 787, 787, 787, 787, 787, 787, 787, 787, 786, 786, 786, 786, 786, 786, 786, 786, 789, 789, 789, 789, 789, 789, 789, 789, 788, 788, 788, 788, 788, 788, 788, 788, 791, 791, 791, 791, 791, 791, 791, 791, 790, 790, 790, 790, 790, 790, 790, 790 +// CHECK: 802, 802, 802, 802, 802, 802, 802, 802, 803, 803, 803, 803, 803, 803, 803, 803, 800, 800, 800, 800, 800, 800, 800, 800, 801, 801, 801, 801, 801, 801, 801, 801, 806, 806, 806, 806, 806, 806, 806, 806, 807, 807, 807, 807, 807, 807, 807, 807, 804, 804, 804, 804, 804, 804, 804, 804, 805, 805, 805, 805, 805, 805, 805, 805, 819, 819, 819, 819, 819, 819, 819, 819, 818, 818, 818, 818, 818, 818, 818, 818, 817, 817, 817, 817, 817, 817, 817, 817, 816, 816, 816, 816, 816, 816, 816, 816, 823, 823, 823, 823, 823, 823, 823, 823, 822, 822, 822, 822, 822, 822, 822, 822, 821, 821, 821, 821, 821, 821, 821, 821, 820, 820, 820, 820, 820, 820, 820, 820 +// CHECK: 836, 836, 836, 836, 836, 836, 836, 836, 837, 837, 837, 837, 837, 837, 837, 837, 838, 838, 838, 838, 838, 838, 838, 838, 839, 839, 839, 839, 839, 839, 839, 839, 832, 832, 832, 832, 832, 832, 832, 832, 833, 833, 833, 833, 833, 833, 833, 833, 834, 834, 834, 834, 834, 834, 834, 834, 835, 835, 835, 835, 835, 835, 835, 835, 853, 853, 853, 853, 853, 853, 853, 853, 852, 852, 852, 852, 852, 852, 852, 852, 855, 855, 855, 855, 855, 855, 855, 855, 854, 854, 854, 854, 854, 854, 854, 854, 849, 849, 849, 849, 849, 849, 849, 849, 848, 848, 848, 848, 848, 848, 848, 848, 851, 851, 851, 851, 851, 851, 851, 851, 850, 850, 850, 850, 850, 850, 850, 850 +// CHECK: 870, 870, 870, 870, 870, 870, 870, 870, 871, 871, 871, 871, 871, 871, 871, 871, 868, 868, 868, 868, 868, 868, 868, 868, 869, 869, 869, 869, 869, 869, 869, 869, 866, 866, 866, 866, 866, 866, 866, 866, 867, 867, 867, 867, 867, 867, 867, 867, 864, 864, 864, 864, 864, 864, 864, 864, 865, 865, 865, 865, 865, 865, 865, 865, 887, 887, 887, 887, 887, 887, 887, 887, 886, 886, 886, 886, 886, 886, 886, 886, 885, 885, 885, 885, 885, 885, 885, 885, 884, 884, 884, 884, 884, 884, 884, 884, 883, 883, 883, 883, 883, 883, 883, 883, 882, 882, 882, 882, 882, 882, 882, 882, 881, 881, 881, 881, 881, 881, 881, 881, 880, 880, 880, 880, 880, 880, 880, 880 +// CHECK: 896, 896, 896, 896, 896, 896, 896, 896, 897, 897, 897, 897, 897, 897, 897, 897, 898, 898, 898, 898, 898, 898, 898, 898, 899, 899, 899, 899, 899, 899, 899, 899, 900, 900, 900, 900, 900, 900, 900, 900, 901, 901, 901, 901, 901, 901, 901, 901, 902, 902, 902, 902, 902, 902, 902, 902, 903, 903, 903, 903, 903, 903, 903, 903, 913, 913, 913, 913, 913, 913, 913, 913, 912, 912, 912, 912, 912, 912, 912, 912, 915, 915, 915, 915, 915, 915, 915, 915, 914, 914, 914, 914, 914, 914, 914, 914, 917, 917, 917, 917, 917, 917, 917, 917, 916, 916, 916, 916, 916, 916, 916, 916, 919, 919, 919, 919, 919, 919, 919, 919, 918, 918, 918, 918, 918, 918, 918, 918 +// CHECK: 930, 930, 930, 930, 930, 930, 930, 930, 931, 931, 931, 931, 931, 931, 931, 931, 928, 928, 928, 928, 928, 928, 928, 928, 929, 929, 929, 929, 929, 929, 929, 929, 934, 934, 934, 934, 934, 934, 934, 934, 935, 935, 935, 935, 935, 935, 935, 935, 932, 932, 932, 932, 932, 932, 932, 932, 933, 933, 933, 933, 933, 933, 933, 933, 947, 947, 947, 947, 947, 947, 947, 947, 946, 946, 946, 946, 946, 946, 946, 946, 945, 945, 945, 945, 945, 945, 945, 945, 944, 944, 944, 944, 944, 944, 944, 944, 951, 951, 951, 951, 951, 951, 951, 951, 950, 950, 950, 950, 950, 950, 950, 950, 949, 949, 949, 949, 949, 949, 949, 949, 948, 948, 948, 948, 948, 948, 948, 948 +// CHECK: 964, 964, 964, 964, 964, 964, 964, 964, 965, 965, 965, 965, 965, 965, 965, 965, 966, 966, 966, 966, 966, 966, 966, 966, 967, 967, 967, 967, 967, 967, 967, 967, 960, 960, 960, 960, 960, 960, 960, 960, 961, 961, 961, 961, 961, 961, 961, 961, 962, 962, 962, 962, 962, 962, 962, 962, 963, 963, 963, 963, 963, 963, 963, 963, 981, 981, 981, 981, 981, 981, 981, 981, 980, 980, 980, 980, 980, 980, 980, 980, 983, 983, 983, 983, 983, 983, 983, 983, 982, 982, 982, 982, 982, 982, 982, 982, 977, 977, 977, 977, 977, 977, 977, 977, 976, 976, 976, 976, 976, 976, 976, 976, 979, 979, 979, 979, 979, 979, 979, 979, 978, 978, 978, 978, 978, 978, 978, 978 +// CHECK: 998, 998, 998, 998, 998, 998, 998, 998, 999, 999, 999, 999, 999, 999, 999, 999, 996, 996, 996, 996, 996, 996, 996, 996, 997, 997, 997, 997, 997, 997, 997, 997, 994, 994, 994, 994, 994, 994, 994, 994, 995, 995, 995, 995, 995, 995, 995, 995, 992, 992, 992, 992, 992, 992, 992, 992, 993, 993, 993, 993, 993, 993, 993, 993, 1015, 1015, 1015, 1015, 1015, 1015, 1015, 1015, 1014, 1014, 1014, 1014, 1014, 1014, 1014, 1014, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1011, 1011, 1011, 1011, 1011, 1011, 1011, 1011, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1009, 1009, 1009, 1009, 1009, 1009, 1009, 1009, 1008, 1008, 1008, 1008, 1008, 1008, 1008, 1008 +// CHECK: 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 25, 25, 25, 25, 25, 25, 25, 25, 24, 24, 24, 24, 24, 24, 24, 24, 27, 27, 27, 27, 27, 27, 27, 27, 26, 26, 26, 26, 26, 26, 26, 26, 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 28, 28, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30 +// CHECK: 42, 42, 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 44, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60 +// CHECK: 76, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, 78, 78, 78, 78, 78, 78, 78, 78, 79, 79, 79, 79, 79, 79, 79, 79, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 93, 93, 93, 93, 93, 93, 93, 93, 92, 92, 92, 92, 92, 92, 92, 92, 95, 95, 95, 95, 95, 95, 95, 95, 94, 94, 94, 94, 94, 94, 94, 94, 89, 89, 89, 89, 89, 89, 89, 89, 88, 88, 88, 88, 88, 88, 88, 88, 91, 91, 91, 91, 91, 91, 91, 91, 90, 90, 90, 90, 90, 90, 90, 90 +// CHECK: 110, 110, 110, 110, 110, 110, 110, 110, 111, 111, 111, 111, 111, 111, 111, 111, 108, 108, 108, 108, 108, 108, 108, 108, 109, 109, 109, 109, 109, 109, 109, 109, 106, 106, 106, 106, 106, 106, 106, 106, 107, 107, 107, 107, 107, 107, 107, 107, 104, 104, 104, 104, 104, 104, 104, 104, 105, 105, 105, 105, 105, 105, 105, 105, 127, 127, 127, 127, 127, 127, 127, 127, 126, 126, 126, 126, 126, 126, 126, 126, 125, 125, 125, 125, 125, 125, 125, 125, 124, 124, 124, 124, 124, 124, 124, 124, 123, 123, 123, 123, 123, 123, 123, 123, 122, 122, 122, 122, 122, 122, 122, 122, 121, 121, 121, 121, 121, 121, 121, 121, 120, 120, 120, 120, 120, 120, 120, 120 +// CHECK: 136, 136, 136, 136, 136, 136, 136, 136, 137, 137, 137, 137, 137, 137, 137, 137, 138, 138, 138, 138, 138, 138, 138, 138, 139, 139, 139, 139, 139, 139, 139, 139, 140, 140, 140, 140, 140, 140, 140, 140, 141, 141, 141, 141, 141, 141, 141, 141, 142, 142, 142, 142, 142, 142, 142, 142, 143, 143, 143, 143, 143, 143, 143, 143, 153, 153, 153, 153, 153, 153, 153, 153, 152, 152, 152, 152, 152, 152, 152, 152, 155, 155, 155, 155, 155, 155, 155, 155, 154, 154, 154, 154, 154, 154, 154, 154, 157, 157, 157, 157, 157, 157, 157, 157, 156, 156, 156, 156, 156, 156, 156, 156, 159, 159, 159, 159, 159, 159, 159, 159, 158, 158, 158, 158, 158, 158, 158, 158 +// CHECK: 170, 170, 170, 170, 170, 170, 170, 170, 171, 171, 171, 171, 171, 171, 171, 171, 168, 168, 168, 168, 168, 168, 168, 168, 169, 169, 169, 169, 169, 169, 169, 169, 174, 174, 174, 174, 174, 174, 174, 174, 175, 175, 175, 175, 175, 175, 175, 175, 172, 172, 172, 172, 172, 172, 172, 172, 173, 173, 173, 173, 173, 173, 173, 173, 187, 187, 187, 187, 187, 187, 187, 187, 186, 186, 186, 186, 186, 186, 186, 186, 185, 185, 185, 185, 185, 185, 185, 185, 184, 184, 184, 184, 184, 184, 184, 184, 191, 191, 191, 191, 191, 191, 191, 191, 190, 190, 190, 190, 190, 190, 190, 190, 189, 189, 189, 189, 189, 189, 189, 189, 188, 188, 188, 188, 188, 188, 188, 188 +// CHECK: 204, 204, 204, 204, 204, 204, 204, 204, 205, 205, 205, 205, 205, 205, 205, 205, 206, 206, 206, 206, 206, 206, 206, 206, 207, 207, 207, 207, 207, 207, 207, 207, 200, 200, 200, 200, 200, 200, 200, 200, 201, 201, 201, 201, 201, 201, 201, 201, 202, 202, 202, 202, 202, 202, 202, 202, 203, 203, 203, 203, 203, 203, 203, 203, 221, 221, 221, 221, 221, 221, 221, 221, 220, 220, 220, 220, 220, 220, 220, 220, 223, 223, 223, 223, 223, 223, 223, 223, 222, 222, 222, 222, 222, 222, 222, 222, 217, 217, 217, 217, 217, 217, 217, 217, 216, 216, 216, 216, 216, 216, 216, 216, 219, 219, 219, 219, 219, 219, 219, 219, 218, 218, 218, 218, 218, 218, 218, 218 +// CHECK: 238, 238, 238, 238, 238, 238, 238, 238, 239, 239, 239, 239, 239, 239, 239, 239, 236, 236, 236, 236, 236, 236, 236, 236, 237, 237, 237, 237, 237, 237, 237, 237, 234, 234, 234, 234, 234, 234, 234, 234, 235, 235, 235, 235, 235, 235, 235, 235, 232, 232, 232, 232, 232, 232, 232, 232, 233, 233, 233, 233, 233, 233, 233, 233, 255, 255, 255, 255, 255, 255, 255, 255, 254, 254, 254, 254, 254, 254, 254, 254, 253, 253, 253, 253, 253, 253, 253, 253, 252, 252, 252, 252, 252, 252, 252, 252, 251, 251, 251, 251, 251, 251, 251, 251, 250, 250, 250, 250, 250, 250, 250, 250, 249, 249, 249, 249, 249, 249, 249, 249, 248, 248, 248, 248, 248, 248, 248, 248 +// CHECK: 264, 264, 264, 264, 264, 264, 264, 264, 265, 265, 265, 265, 265, 265, 265, 265, 266, 266, 266, 266, 266, 266, 266, 266, 267, 267, 267, 267, 267, 267, 267, 267, 268, 268, 268, 268, 268, 268, 268, 268, 269, 269, 269, 269, 269, 269, 269, 269, 270, 270, 270, 270, 270, 270, 270, 270, 271, 271, 271, 271, 271, 271, 271, 271, 281, 281, 281, 281, 281, 281, 281, 281, 280, 280, 280, 280, 280, 280, 280, 280, 283, 283, 283, 283, 283, 283, 283, 283, 282, 282, 282, 282, 282, 282, 282, 282, 285, 285, 285, 285, 285, 285, 285, 285, 284, 284, 284, 284, 284, 284, 284, 284, 287, 287, 287, 287, 287, 287, 287, 287, 286, 286, 286, 286, 286, 286, 286, 286 +// CHECK: 298, 298, 298, 298, 298, 298, 298, 298, 299, 299, 299, 299, 299, 299, 299, 299, 296, 296, 296, 296, 296, 296, 296, 296, 297, 297, 297, 297, 297, 297, 297, 297, 302, 302, 302, 302, 302, 302, 302, 302, 303, 303, 303, 303, 303, 303, 303, 303, 300, 300, 300, 300, 300, 300, 300, 300, 301, 301, 301, 301, 301, 301, 301, 301, 315, 315, 315, 315, 315, 315, 315, 315, 314, 314, 314, 314, 314, 314, 314, 314, 313, 313, 313, 313, 313, 313, 313, 313, 312, 312, 312, 312, 312, 312, 312, 312, 319, 319, 319, 319, 319, 319, 319, 319, 318, 318, 318, 318, 318, 318, 318, 318, 317, 317, 317, 317, 317, 317, 317, 317, 316, 316, 316, 316, 316, 316, 316, 316 +// CHECK: 332, 332, 332, 332, 332, 332, 332, 332, 333, 333, 333, 333, 333, 333, 333, 333, 334, 334, 334, 334, 334, 334, 334, 334, 335, 335, 335, 335, 335, 335, 335, 335, 328, 328, 328, 328, 328, 328, 328, 328, 329, 329, 329, 329, 329, 329, 329, 329, 330, 330, 330, 330, 330, 330, 330, 330, 331, 331, 331, 331, 331, 331, 331, 331, 349, 349, 349, 349, 349, 349, 349, 349, 348, 348, 348, 348, 348, 348, 348, 348, 351, 351, 351, 351, 351, 351, 351, 351, 350, 350, 350, 350, 350, 350, 350, 350, 345, 345, 345, 345, 345, 345, 345, 345, 344, 344, 344, 344, 344, 344, 344, 344, 347, 347, 347, 347, 347, 347, 347, 347, 346, 346, 346, 346, 346, 346, 346, 346 +// CHECK: 366, 366, 366, 366, 366, 366, 366, 366, 367, 367, 367, 367, 367, 367, 367, 367, 364, 364, 364, 364, 364, 364, 364, 364, 365, 365, 365, 365, 365, 365, 365, 365, 362, 362, 362, 362, 362, 362, 362, 362, 363, 363, 363, 363, 363, 363, 363, 363, 360, 360, 360, 360, 360, 360, 360, 360, 361, 361, 361, 361, 361, 361, 361, 361, 383, 383, 383, 383, 383, 383, 383, 383, 382, 382, 382, 382, 382, 382, 382, 382, 381, 381, 381, 381, 381, 381, 381, 381, 380, 380, 380, 380, 380, 380, 380, 380, 379, 379, 379, 379, 379, 379, 379, 379, 378, 378, 378, 378, 378, 378, 378, 378, 377, 377, 377, 377, 377, 377, 377, 377, 376, 376, 376, 376, 376, 376, 376, 376 +// CHECK: 392, 392, 392, 392, 392, 392, 392, 392, 393, 393, 393, 393, 393, 393, 393, 393, 394, 394, 394, 394, 394, 394, 394, 394, 395, 395, 395, 395, 395, 395, 395, 395, 396, 396, 396, 396, 396, 396, 396, 396, 397, 397, 397, 397, 397, 397, 397, 397, 398, 398, 398, 398, 398, 398, 398, 398, 399, 399, 399, 399, 399, 399, 399, 399, 409, 409, 409, 409, 409, 409, 409, 409, 408, 408, 408, 408, 408, 408, 408, 408, 411, 411, 411, 411, 411, 411, 411, 411, 410, 410, 410, 410, 410, 410, 410, 410, 413, 413, 413, 413, 413, 413, 413, 413, 412, 412, 412, 412, 412, 412, 412, 412, 415, 415, 415, 415, 415, 415, 415, 415, 414, 414, 414, 414, 414, 414, 414, 414 +// CHECK: 426, 426, 426, 426, 426, 426, 426, 426, 427, 427, 427, 427, 427, 427, 427, 427, 424, 424, 424, 424, 424, 424, 424, 424, 425, 425, 425, 425, 425, 425, 425, 425, 430, 430, 430, 430, 430, 430, 430, 430, 431, 431, 431, 431, 431, 431, 431, 431, 428, 428, 428, 428, 428, 428, 428, 428, 429, 429, 429, 429, 429, 429, 429, 429, 443, 443, 443, 443, 443, 443, 443, 443, 442, 442, 442, 442, 442, 442, 442, 442, 441, 441, 441, 441, 441, 441, 441, 441, 440, 440, 440, 440, 440, 440, 440, 440, 447, 447, 447, 447, 447, 447, 447, 447, 446, 446, 446, 446, 446, 446, 446, 446, 445, 445, 445, 445, 445, 445, 445, 445, 444, 444, 444, 444, 444, 444, 444, 444 +// CHECK: 460, 460, 460, 460, 460, 460, 460, 460, 461, 461, 461, 461, 461, 461, 461, 461, 462, 462, 462, 462, 462, 462, 462, 462, 463, 463, 463, 463, 463, 463, 463, 463, 456, 456, 456, 456, 456, 456, 456, 456, 457, 457, 457, 457, 457, 457, 457, 457, 458, 458, 458, 458, 458, 458, 458, 458, 459, 459, 459, 459, 459, 459, 459, 459, 477, 477, 477, 477, 477, 477, 477, 477, 476, 476, 476, 476, 476, 476, 476, 476, 479, 479, 479, 479, 479, 479, 479, 479, 478, 478, 478, 478, 478, 478, 478, 478, 473, 473, 473, 473, 473, 473, 473, 473, 472, 472, 472, 472, 472, 472, 472, 472, 475, 475, 475, 475, 475, 475, 475, 475, 474, 474, 474, 474, 474, 474, 474, 474 +// CHECK: 494, 494, 494, 494, 494, 494, 494, 494, 495, 495, 495, 495, 495, 495, 495, 495, 492, 492, 492, 492, 492, 492, 492, 492, 493, 493, 493, 493, 493, 493, 493, 493, 490, 490, 490, 490, 490, 490, 490, 490, 491, 491, 491, 491, 491, 491, 491, 491, 488, 488, 488, 488, 488, 488, 488, 488, 489, 489, 489, 489, 489, 489, 489, 489, 511, 511, 511, 511, 511, 511, 511, 511, 510, 510, 510, 510, 510, 510, 510, 510, 509, 509, 509, 509, 509, 509, 509, 509, 508, 508, 508, 508, 508, 508, 508, 508, 507, 507, 507, 507, 507, 507, 507, 507, 506, 506, 506, 506, 506, 506, 506, 506, 505, 505, 505, 505, 505, 505, 505, 505, 504, 504, 504, 504, 504, 504, 504, 504 +// CHECK: 520, 520, 520, 520, 520, 520, 520, 520, 521, 521, 521, 521, 521, 521, 521, 521, 522, 522, 522, 522, 522, 522, 522, 522, 523, 523, 523, 523, 523, 523, 523, 523, 524, 524, 524, 524, 524, 524, 524, 524, 525, 525, 525, 525, 525, 525, 525, 525, 526, 526, 526, 526, 526, 526, 526, 526, 527, 527, 527, 527, 527, 527, 527, 527, 537, 537, 537, 537, 537, 537, 537, 537, 536, 536, 536, 536, 536, 536, 536, 536, 539, 539, 539, 539, 539, 539, 539, 539, 538, 538, 538, 538, 538, 538, 538, 538, 541, 541, 541, 541, 541, 541, 541, 541, 540, 540, 540, 540, 540, 540, 540, 540, 543, 543, 543, 543, 543, 543, 543, 543, 542, 542, 542, 542, 542, 542, 542, 542 +// CHECK: 554, 554, 554, 554, 554, 554, 554, 554, 555, 555, 555, 555, 555, 555, 555, 555, 552, 552, 552, 552, 552, 552, 552, 552, 553, 553, 553, 553, 553, 553, 553, 553, 558, 558, 558, 558, 558, 558, 558, 558, 559, 559, 559, 559, 559, 559, 559, 559, 556, 556, 556, 556, 556, 556, 556, 556, 557, 557, 557, 557, 557, 557, 557, 557, 571, 571, 571, 571, 571, 571, 571, 571, 570, 570, 570, 570, 570, 570, 570, 570, 569, 569, 569, 569, 569, 569, 569, 569, 568, 568, 568, 568, 568, 568, 568, 568, 575, 575, 575, 575, 575, 575, 575, 575, 574, 574, 574, 574, 574, 574, 574, 574, 573, 573, 573, 573, 573, 573, 573, 573, 572, 572, 572, 572, 572, 572, 572, 572 +// CHECK: 588, 588, 588, 588, 588, 588, 588, 588, 589, 589, 589, 589, 589, 589, 589, 589, 590, 590, 590, 590, 590, 590, 590, 590, 591, 591, 591, 591, 591, 591, 591, 591, 584, 584, 584, 584, 584, 584, 584, 584, 585, 585, 585, 585, 585, 585, 585, 585, 586, 586, 586, 586, 586, 586, 586, 586, 587, 587, 587, 587, 587, 587, 587, 587, 605, 605, 605, 605, 605, 605, 605, 605, 604, 604, 604, 604, 604, 604, 604, 604, 607, 607, 607, 607, 607, 607, 607, 607, 606, 606, 606, 606, 606, 606, 606, 606, 601, 601, 601, 601, 601, 601, 601, 601, 600, 600, 600, 600, 600, 600, 600, 600, 603, 603, 603, 603, 603, 603, 603, 603, 602, 602, 602, 602, 602, 602, 602, 602 +// CHECK: 622, 622, 622, 622, 622, 622, 622, 622, 623, 623, 623, 623, 623, 623, 623, 623, 620, 620, 620, 620, 620, 620, 620, 620, 621, 621, 621, 621, 621, 621, 621, 621, 618, 618, 618, 618, 618, 618, 618, 618, 619, 619, 619, 619, 619, 619, 619, 619, 616, 616, 616, 616, 616, 616, 616, 616, 617, 617, 617, 617, 617, 617, 617, 617, 639, 639, 639, 639, 639, 639, 639, 639, 638, 638, 638, 638, 638, 638, 638, 638, 637, 637, 637, 637, 637, 637, 637, 637, 636, 636, 636, 636, 636, 636, 636, 636, 635, 635, 635, 635, 635, 635, 635, 635, 634, 634, 634, 634, 634, 634, 634, 634, 633, 633, 633, 633, 633, 633, 633, 633, 632, 632, 632, 632, 632, 632, 632, 632 +// CHECK: 648, 648, 648, 648, 648, 648, 648, 648, 649, 649, 649, 649, 649, 649, 649, 649, 650, 650, 650, 650, 650, 650, 650, 650, 651, 651, 651, 651, 651, 651, 651, 651, 652, 652, 652, 652, 652, 652, 652, 652, 653, 653, 653, 653, 653, 653, 653, 653, 654, 654, 654, 654, 654, 654, 654, 654, 655, 655, 655, 655, 655, 655, 655, 655, 665, 665, 665, 665, 665, 665, 665, 665, 664, 664, 664, 664, 664, 664, 664, 664, 667, 667, 667, 667, 667, 667, 667, 667, 666, 666, 666, 666, 666, 666, 666, 666, 669, 669, 669, 669, 669, 669, 669, 669, 668, 668, 668, 668, 668, 668, 668, 668, 671, 671, 671, 671, 671, 671, 671, 671, 670, 670, 670, 670, 670, 670, 670, 670 +// CHECK: 682, 682, 682, 682, 682, 682, 682, 682, 683, 683, 683, 683, 683, 683, 683, 683, 680, 680, 680, 680, 680, 680, 680, 680, 681, 681, 681, 681, 681, 681, 681, 681, 686, 686, 686, 686, 686, 686, 686, 686, 687, 687, 687, 687, 687, 687, 687, 687, 684, 684, 684, 684, 684, 684, 684, 684, 685, 685, 685, 685, 685, 685, 685, 685, 699, 699, 699, 699, 699, 699, 699, 699, 698, 698, 698, 698, 698, 698, 698, 698, 697, 697, 697, 697, 697, 697, 697, 697, 696, 696, 696, 696, 696, 696, 696, 696, 703, 703, 703, 703, 703, 703, 703, 703, 702, 702, 702, 702, 702, 702, 702, 702, 701, 701, 701, 701, 701, 701, 701, 701, 700, 700, 700, 700, 700, 700, 700, 700 +// CHECK: 716, 716, 716, 716, 716, 716, 716, 716, 717, 717, 717, 717, 717, 717, 717, 717, 718, 718, 718, 718, 718, 718, 718, 718, 719, 719, 719, 719, 719, 719, 719, 719, 712, 712, 712, 712, 712, 712, 712, 712, 713, 713, 713, 713, 713, 713, 713, 713, 714, 714, 714, 714, 714, 714, 714, 714, 715, 715, 715, 715, 715, 715, 715, 715, 733, 733, 733, 733, 733, 733, 733, 733, 732, 732, 732, 732, 732, 732, 732, 732, 735, 735, 735, 735, 735, 735, 735, 735, 734, 734, 734, 734, 734, 734, 734, 734, 729, 729, 729, 729, 729, 729, 729, 729, 728, 728, 728, 728, 728, 728, 728, 728, 731, 731, 731, 731, 731, 731, 731, 731, 730, 730, 730, 730, 730, 730, 730, 730 +// CHECK: 750, 750, 750, 750, 750, 750, 750, 750, 751, 751, 751, 751, 751, 751, 751, 751, 748, 748, 748, 748, 748, 748, 748, 748, 749, 749, 749, 749, 749, 749, 749, 749, 746, 746, 746, 746, 746, 746, 746, 746, 747, 747, 747, 747, 747, 747, 747, 747, 744, 744, 744, 744, 744, 744, 744, 744, 745, 745, 745, 745, 745, 745, 745, 745, 767, 767, 767, 767, 767, 767, 767, 767, 766, 766, 766, 766, 766, 766, 766, 766, 765, 765, 765, 765, 765, 765, 765, 765, 764, 764, 764, 764, 764, 764, 764, 764, 763, 763, 763, 763, 763, 763, 763, 763, 762, 762, 762, 762, 762, 762, 762, 762, 761, 761, 761, 761, 761, 761, 761, 761, 760, 760, 760, 760, 760, 760, 760, 760 +// CHECK: 776, 776, 776, 776, 776, 776, 776, 776, 777, 777, 777, 777, 777, 777, 777, 777, 778, 778, 778, 778, 778, 778, 778, 778, 779, 779, 779, 779, 779, 779, 779, 779, 780, 780, 780, 780, 780, 780, 780, 780, 781, 781, 781, 781, 781, 781, 781, 781, 782, 782, 782, 782, 782, 782, 782, 782, 783, 783, 783, 783, 783, 783, 783, 783, 793, 793, 793, 793, 793, 793, 793, 793, 792, 792, 792, 792, 792, 792, 792, 792, 795, 795, 795, 795, 795, 795, 795, 795, 794, 794, 794, 794, 794, 794, 794, 794, 797, 797, 797, 797, 797, 797, 797, 797, 796, 796, 796, 796, 796, 796, 796, 796, 799, 799, 799, 799, 799, 799, 799, 799, 798, 798, 798, 798, 798, 798, 798, 798 +// CHECK: 810, 810, 810, 810, 810, 810, 810, 810, 811, 811, 811, 811, 811, 811, 811, 811, 808, 808, 808, 808, 808, 808, 808, 808, 809, 809, 809, 809, 809, 809, 809, 809, 814, 814, 814, 814, 814, 814, 814, 814, 815, 815, 815, 815, 815, 815, 815, 815, 812, 812, 812, 812, 812, 812, 812, 812, 813, 813, 813, 813, 813, 813, 813, 813, 827, 827, 827, 827, 827, 827, 827, 827, 826, 826, 826, 826, 826, 826, 826, 826, 825, 825, 825, 825, 825, 825, 825, 825, 824, 824, 824, 824, 824, 824, 824, 824, 831, 831, 831, 831, 831, 831, 831, 831, 830, 830, 830, 830, 830, 830, 830, 830, 829, 829, 829, 829, 829, 829, 829, 829, 828, 828, 828, 828, 828, 828, 828, 828 +// CHECK: 844, 844, 844, 844, 844, 844, 844, 844, 845, 845, 845, 845, 845, 845, 845, 845, 846, 846, 846, 846, 846, 846, 846, 846, 847, 847, 847, 847, 847, 847, 847, 847, 840, 840, 840, 840, 840, 840, 840, 840, 841, 841, 841, 841, 841, 841, 841, 841, 842, 842, 842, 842, 842, 842, 842, 842, 843, 843, 843, 843, 843, 843, 843, 843, 861, 861, 861, 861, 861, 861, 861, 861, 860, 860, 860, 860, 860, 860, 860, 860, 863, 863, 863, 863, 863, 863, 863, 863, 862, 862, 862, 862, 862, 862, 862, 862, 857, 857, 857, 857, 857, 857, 857, 857, 856, 856, 856, 856, 856, 856, 856, 856, 859, 859, 859, 859, 859, 859, 859, 859, 858, 858, 858, 858, 858, 858, 858, 858 +// CHECK: 878, 878, 878, 878, 878, 878, 878, 878, 879, 879, 879, 879, 879, 879, 879, 879, 876, 876, 876, 876, 876, 876, 876, 876, 877, 877, 877, 877, 877, 877, 877, 877, 874, 874, 874, 874, 874, 874, 874, 874, 875, 875, 875, 875, 875, 875, 875, 875, 872, 872, 872, 872, 872, 872, 872, 872, 873, 873, 873, 873, 873, 873, 873, 873, 895, 895, 895, 895, 895, 895, 895, 895, 894, 894, 894, 894, 894, 894, 894, 894, 893, 893, 893, 893, 893, 893, 893, 893, 892, 892, 892, 892, 892, 892, 892, 892, 891, 891, 891, 891, 891, 891, 891, 891, 890, 890, 890, 890, 890, 890, 890, 890, 889, 889, 889, 889, 889, 889, 889, 889, 888, 888, 888, 888, 888, 888, 888, 888 +// CHECK: 904, 904, 904, 904, 904, 904, 904, 904, 905, 905, 905, 905, 905, 905, 905, 905, 906, 906, 906, 906, 906, 906, 906, 906, 907, 907, 907, 907, 907, 907, 907, 907, 908, 908, 908, 908, 908, 908, 908, 908, 909, 909, 909, 909, 909, 909, 909, 909, 910, 910, 910, 910, 910, 910, 910, 910, 911, 911, 911, 911, 911, 911, 911, 911, 921, 921, 921, 921, 921, 921, 921, 921, 920, 920, 920, 920, 920, 920, 920, 920, 923, 923, 923, 923, 923, 923, 923, 923, 922, 922, 922, 922, 922, 922, 922, 922, 925, 925, 925, 925, 925, 925, 925, 925, 924, 924, 924, 924, 924, 924, 924, 924, 927, 927, 927, 927, 927, 927, 927, 927, 926, 926, 926, 926, 926, 926, 926, 926 +// CHECK: 938, 938, 938, 938, 938, 938, 938, 938, 939, 939, 939, 939, 939, 939, 939, 939, 936, 936, 936, 936, 936, 936, 936, 936, 937, 937, 937, 937, 937, 937, 937, 937, 942, 942, 942, 942, 942, 942, 942, 942, 943, 943, 943, 943, 943, 943, 943, 943, 940, 940, 940, 940, 940, 940, 940, 940, 941, 941, 941, 941, 941, 941, 941, 941, 955, 955, 955, 955, 955, 955, 955, 955, 954, 954, 954, 954, 954, 954, 954, 954, 953, 953, 953, 953, 953, 953, 953, 953, 952, 952, 952, 952, 952, 952, 952, 952, 959, 959, 959, 959, 959, 959, 959, 959, 958, 958, 958, 958, 958, 958, 958, 958, 957, 957, 957, 957, 957, 957, 957, 957, 956, 956, 956, 956, 956, 956, 956, 956 +// CHECK: 972, 972, 972, 972, 972, 972, 972, 972, 973, 973, 973, 973, 973, 973, 973, 973, 974, 974, 974, 974, 974, 974, 974, 974, 975, 975, 975, 975, 975, 975, 975, 975, 968, 968, 968, 968, 968, 968, 968, 968, 969, 969, 969, 969, 969, 969, 969, 969, 970, 970, 970, 970, 970, 970, 970, 970, 971, 971, 971, 971, 971, 971, 971, 971, 989, 989, 989, 989, 989, 989, 989, 989, 988, 988, 988, 988, 988, 988, 988, 988, 991, 991, 991, 991, 991, 991, 991, 991, 990, 990, 990, 990, 990, 990, 990, 990, 985, 985, 985, 985, 985, 985, 985, 985, 984, 984, 984, 984, 984, 984, 984, 984, 987, 987, 987, 987, 987, 987, 987, 987, 986, 986, 986, 986, 986, 986, 986, 986 +// CHECK: 1006, 1006, 1006, 1006, 1006, 1006, 1006, 1006, 1007, 1007, 1007, 1007, 1007, 1007, 1007, 1007, 1004, 1004, 1004, 1004, 1004, 1004, 1004, 1004, 1005, 1005, 1005, 1005, 1005, 1005, 1005, 1005, 1002, 1002, 1002, 1002, 1002, 1002, 1002, 1002, 1003, 1003, 1003, 1003, 1003, 1003, 1003, 1003, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1001, 1023, 1023, 1023, 1023, 1023, 1023, 1023, 1023, 1022, 1022, 1022, 1022, 1022, 1022, 1022, 1022, 1021, 1021, 1021, 1021, 1021, 1021, 1021, 1021, 1020, 1020, 1020, 1020, 1020, 1020, 1020, 1020, 1019, 1019, 1019, 1019, 1019, 1019, 1019, 1019, 1018, 1018, 1018, 1018, 1018, 1018, 1018, 1018, 1017, 1017, 1017, 1017, 1017, 1017, 1017, 1017, 1016, 1016, 1016, 1016, 1016, 1016, 1016, 1016 From 4506de1e2bfe9352331d75fe6d3c89f3d2f5287c Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Thu, 26 Oct 2023 13:34:54 +0530 Subject: [PATCH 026/877] NFC. Move out and expose affine expression simplification utility out of AffineOps lib (#69813) Move out trivial affine expression simplification out of AffineOps library. Expose it from libIR. Users of such methods shouldn't have to rely on the AffineOps dialect. For eg., with this change, the method can be used now from lib/Analysis/ (FlatLinearConstraints) as well as AffineOps dialect canonicalization. This way those one won't need to depend on AffineOps for some simplification of affine expressions. --- mlir/include/mlir/IR/AffineExpr.h | 14 +++ mlir/lib/Dialect/Affine/IR/AffineOps.cpp | 115 +++-------------------- mlir/lib/IR/AffineExpr.cpp | 80 ++++++++++++++++ 3 files changed, 108 insertions(+), 101 deletions(-) diff --git a/mlir/include/mlir/IR/AffineExpr.h b/mlir/include/mlir/IR/AffineExpr.h index 8ced8770591ee..69e02c94ef270 100644 --- a/mlir/include/mlir/IR/AffineExpr.h +++ b/mlir/include/mlir/IR/AffineExpr.h @@ -353,6 +353,20 @@ void bindSymbolsList(MLIRContext *ctx, MutableArrayRef exprs) { e = getAffineSymbolExpr(idx++, ctx); } +/// Get a lower or upper (depending on `isUpper`) bound for `expr` while using +/// the constant lower and upper bounds for its inputs provided in +/// `constLowerBounds` and `constUpperBounds`. Return std::nullopt if such a +/// bound can't be computed. This method only handles simple sum of product +/// expressions (w.r.t constant coefficients) so as to not depend on anything +/// heavyweight in `Analysis`. Expressions of the form: c0*d0 + c1*d1 + c2*s0 + +/// ... + c_n are handled. Expressions involving floordiv, ceildiv, mod or +/// semi-affine ones will lead a none being returned. +std::optional +getBoundForAffineExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols, + ArrayRef> constLowerBounds, + ArrayRef> constUpperBounds, + bool isUpper); + } // namespace mlir namespace llvm { diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index 4d79c458889d2..ba4285bd52394 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -700,93 +700,6 @@ static std::optional getUpperBound(Value iv) { return forOp.getConstantUpperBound() - 1; } -/// Get a lower or upper (depending on `isUpper`) bound for `expr` while using -/// the constant lower and upper bounds for its inputs provided in -/// `constLowerBounds` and `constUpperBounds`. Return std::nullopt if such a -/// bound can't be computed. This method only handles simple sum of product -/// expressions (w.r.t constant coefficients) so as to not depend on anything -/// heavyweight in `Analysis`. Expressions of the form: c0*d0 + c1*d1 + c2*s0 + -/// ... + c_n are handled. Expressions involving floordiv, ceildiv, mod or -/// semi-affine ones will lead std::nullopt being returned. -static std::optional -getBoundForExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols, - ArrayRef> constLowerBounds, - ArrayRef> constUpperBounds, - bool isUpper) { - // Handle divs and mods. - if (auto binOpExpr = expr.dyn_cast()) { - // If the LHS of a floor or ceil is bounded and the RHS is a constant, we - // can compute an upper bound. - if (binOpExpr.getKind() == AffineExprKind::FloorDiv) { - auto rhsConst = binOpExpr.getRHS().dyn_cast(); - if (!rhsConst || rhsConst.getValue() < 1) - return std::nullopt; - auto bound = getBoundForExpr(binOpExpr.getLHS(), numDims, numSymbols, - constLowerBounds, constUpperBounds, isUpper); - if (!bound) - return std::nullopt; - return mlir::floorDiv(*bound, rhsConst.getValue()); - } - if (binOpExpr.getKind() == AffineExprKind::CeilDiv) { - auto rhsConst = binOpExpr.getRHS().dyn_cast(); - if (rhsConst && rhsConst.getValue() >= 1) { - auto bound = - getBoundForExpr(binOpExpr.getLHS(), numDims, numSymbols, - constLowerBounds, constUpperBounds, isUpper); - if (!bound) - return std::nullopt; - return mlir::ceilDiv(*bound, rhsConst.getValue()); - } - return std::nullopt; - } - if (binOpExpr.getKind() == AffineExprKind::Mod) { - // lhs mod c is always <= c - 1 and non-negative. In addition, if `lhs` is - // bounded such that lb <= lhs <= ub and lb floordiv c == ub floordiv c - // (same "interval"), then lb mod c <= lhs mod c <= ub mod c. - auto rhsConst = binOpExpr.getRHS().dyn_cast(); - if (rhsConst && rhsConst.getValue() >= 1) { - int64_t rhsConstVal = rhsConst.getValue(); - auto lb = getBoundForExpr(binOpExpr.getLHS(), numDims, numSymbols, - constLowerBounds, constUpperBounds, - /*isUpper=*/false); - auto ub = getBoundForExpr(binOpExpr.getLHS(), numDims, numSymbols, - constLowerBounds, constUpperBounds, isUpper); - if (ub && lb && - floorDiv(*lb, rhsConstVal) == floorDiv(*ub, rhsConstVal)) - return isUpper ? mod(*ub, rhsConstVal) : mod(*lb, rhsConstVal); - return isUpper ? rhsConstVal - 1 : 0; - } - } - } - // Flatten the expression. - SimpleAffineExprFlattener flattener(numDims, numSymbols); - flattener.walkPostOrder(expr); - ArrayRef flattenedExpr = flattener.operandExprStack.back(); - // TODO: Handle local variables. We can get hold of flattener.localExprs and - // get bound on the local expr recursively. - if (flattener.numLocals > 0) - return std::nullopt; - int64_t bound = 0; - // Substitute the constant lower or upper bound for the dimensional or - // symbolic input depending on `isUpper` to determine the bound. - for (unsigned i = 0, e = numDims + numSymbols; i < e; ++i) { - if (flattenedExpr[i] > 0) { - auto &constBound = isUpper ? constUpperBounds[i] : constLowerBounds[i]; - if (!constBound) - return std::nullopt; - bound += *constBound * flattenedExpr[i]; - } else if (flattenedExpr[i] < 0) { - auto &constBound = isUpper ? constLowerBounds[i] : constUpperBounds[i]; - if (!constBound) - return std::nullopt; - bound += *constBound * flattenedExpr[i]; - } - } - // Constant term. - bound += flattenedExpr.back(); - return bound; -} - /// Determine a constant upper bound for `expr` if one exists while exploiting /// values in `operands`. Note that the upper bound is an inclusive one. `expr` /// is guaranteed to be less than or equal to it. @@ -805,9 +718,9 @@ static std::optional getUpperBound(AffineExpr expr, unsigned numDims, if (auto constExpr = expr.dyn_cast()) return constExpr.getValue(); - return getBoundForExpr(expr, numDims, numSymbols, constLowerBounds, - constUpperBounds, - /*isUpper=*/true); + return getBoundForAffineExpr(expr, numDims, numSymbols, constLowerBounds, + constUpperBounds, + /*isUpper=*/true); } /// Determine a constant lower bound for `expr` if one exists while exploiting @@ -829,9 +742,9 @@ static std::optional getLowerBound(AffineExpr expr, unsigned numDims, if (auto constExpr = expr.dyn_cast()) { lowerBound = constExpr.getValue(); } else { - lowerBound = getBoundForExpr(expr, numDims, numSymbols, constLowerBounds, - constUpperBounds, - /*isUpper=*/false); + lowerBound = getBoundForAffineExpr(expr, numDims, numSymbols, + constLowerBounds, constUpperBounds, + /*isUpper=*/false); } return lowerBound; } @@ -970,14 +883,14 @@ static void simplifyMinOrMaxExprWithOperands(AffineMap &map, lowerBounds.push_back(constExpr.getValue()); upperBounds.push_back(constExpr.getValue()); } else { - lowerBounds.push_back(getBoundForExpr(e, map.getNumDims(), - map.getNumSymbols(), - constLowerBounds, constUpperBounds, - /*isUpper=*/false)); - upperBounds.push_back(getBoundForExpr(e, map.getNumDims(), - map.getNumSymbols(), - constLowerBounds, constUpperBounds, - /*isUpper=*/true)); + lowerBounds.push_back( + getBoundForAffineExpr(e, map.getNumDims(), map.getNumSymbols(), + constLowerBounds, constUpperBounds, + /*isUpper=*/false)); + upperBounds.push_back( + getBoundForAffineExpr(e, map.getNumDims(), map.getNumSymbols(), + constLowerBounds, constUpperBounds, + /*isUpper=*/true)); } } diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp index 7eccbca4e6e7a..4b7ec89a842bd 100644 --- a/mlir/lib/IR/AffineExpr.cpp +++ b/mlir/lib/IR/AffineExpr.cpp @@ -1438,3 +1438,83 @@ AffineExpr mlir::simplifyAffineExpr(AffineExpr expr, unsigned numDims, assert(flattener.operandExprStack.empty()); return simplifiedExpr; } + +std::optional mlir::getBoundForAffineExpr( + AffineExpr expr, unsigned numDims, unsigned numSymbols, + ArrayRef> constLowerBounds, + ArrayRef> constUpperBounds, bool isUpper) { + // Handle divs and mods. + if (auto binOpExpr = expr.dyn_cast()) { + // If the LHS of a floor or ceil is bounded and the RHS is a constant, we + // can compute an upper bound. + if (binOpExpr.getKind() == AffineExprKind::FloorDiv) { + auto rhsConst = binOpExpr.getRHS().dyn_cast(); + if (!rhsConst || rhsConst.getValue() < 1) + return std::nullopt; + auto bound = + getBoundForAffineExpr(binOpExpr.getLHS(), numDims, numSymbols, + constLowerBounds, constUpperBounds, isUpper); + if (!bound) + return std::nullopt; + return mlir::floorDiv(*bound, rhsConst.getValue()); + } + if (binOpExpr.getKind() == AffineExprKind::CeilDiv) { + auto rhsConst = binOpExpr.getRHS().dyn_cast(); + if (rhsConst && rhsConst.getValue() >= 1) { + auto bound = + getBoundForAffineExpr(binOpExpr.getLHS(), numDims, numSymbols, + constLowerBounds, constUpperBounds, isUpper); + if (!bound) + return std::nullopt; + return mlir::ceilDiv(*bound, rhsConst.getValue()); + } + return std::nullopt; + } + if (binOpExpr.getKind() == AffineExprKind::Mod) { + // lhs mod c is always <= c - 1 and non-negative. In addition, if `lhs` is + // bounded such that lb <= lhs <= ub and lb floordiv c == ub floordiv c + // (same "interval"), then lb mod c <= lhs mod c <= ub mod c. + auto rhsConst = binOpExpr.getRHS().dyn_cast(); + if (rhsConst && rhsConst.getValue() >= 1) { + int64_t rhsConstVal = rhsConst.getValue(); + auto lb = getBoundForAffineExpr(binOpExpr.getLHS(), numDims, numSymbols, + constLowerBounds, constUpperBounds, + /*isUpper=*/false); + auto ub = + getBoundForAffineExpr(binOpExpr.getLHS(), numDims, numSymbols, + constLowerBounds, constUpperBounds, isUpper); + if (ub && lb && + floorDiv(*lb, rhsConstVal) == floorDiv(*ub, rhsConstVal)) + return isUpper ? mod(*ub, rhsConstVal) : mod(*lb, rhsConstVal); + return isUpper ? rhsConstVal - 1 : 0; + } + } + } + // Flatten the expression. + SimpleAffineExprFlattener flattener(numDims, numSymbols); + flattener.walkPostOrder(expr); + ArrayRef flattenedExpr = flattener.operandExprStack.back(); + // TODO: Handle local variables. We can get hold of flattener.localExprs and + // get bound on the local expr recursively. + if (flattener.numLocals > 0) + return std::nullopt; + int64_t bound = 0; + // Substitute the constant lower or upper bound for the dimensional or + // symbolic input depending on `isUpper` to determine the bound. + for (unsigned i = 0, e = numDims + numSymbols; i < e; ++i) { + if (flattenedExpr[i] > 0) { + auto &constBound = isUpper ? constUpperBounds[i] : constLowerBounds[i]; + if (!constBound) + return std::nullopt; + bound += *constBound * flattenedExpr[i]; + } else if (flattenedExpr[i] < 0) { + auto &constBound = isUpper ? constLowerBounds[i] : constUpperBounds[i]; + if (!constBound) + return std::nullopt; + bound += *constBound * flattenedExpr[i]; + } + } + // Constant term. + bound += flattenedExpr.back(); + return bound; +} From 4f131b0d2214b5fbaef43d130e576b40a20bda32 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 26 Oct 2023 10:19:06 +0200 Subject: [PATCH 027/877] [IR] Require index width to be ule pointer width (#70015) I don't think there is a use case for having an index type that is wider than the pointer type, and I'm not entirely clear what semantics this would even have. Also clarify the GEP semantics to explicitly say how they interact with the index type width. --- llvm/docs/LangRef.rst | 31 +++--- llvm/lib/IR/DataLayout.cpp | 2 + .../ptrtoint-constantexpr-loop.ll | 97 ------------------- .../invalid-datalayout-index-size.ll | 3 + .../test/Transforms/InferAlignment/ptrmask.ll | 15 +-- mlir/test/Target/LLVMIR/Import/data-layout.ll | 4 +- mlir/test/Target/LLVMIR/data-layout.mlir | 4 +- 7 files changed, 30 insertions(+), 126 deletions(-) create mode 100644 llvm/test/Assembler/invalid-datalayout-index-size.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index e4625cae9fc53..cabb5cd1bed62 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2883,7 +2883,8 @@ as follows: This specifies the *size* of a pointer and its ```` and ````\erred alignments for address space ``n``. ```` is optional and defaults to ````. The fourth parameter ```` is the size of the - index that used for address calculation. If not + index that used for address calculation, which must be less than or equal + to the pointer size. If not specified, the default index size is equal to the pointer size. All sizes are in bits. The address space, ``n``, is optional, and if not specified, denotes the default address space 0. The value of ``n`` must be @@ -11030,6 +11031,24 @@ for the given testcase is equivalent to: ret ptr %t5 } +The indices are first converted to offsets in the pointer's index type. If the +currently indexed type is a struct type, the struct offset corresponding to the +index is sign-extended or truncated to the pointer index type. Otherwise, the +index itself is sign-extended or truncated, and then multiplied by the type +allocation size (that is, the size rounded up to the ABI alignment) of the +currently indexed type. + +The offsets are then added to the low bits of the base address up to the index +type width, with silently-wrapping two's complement arithmetic. If the pointer +size is larger than the index size, this means that the bits outside the index +type width will not be affected. + +The result value of the ``getelementptr`` may be outside the object pointed +to by the base pointer. The result value may not necessarily be used to access +memory though, even if it happens to point into allocated storage. See the +:ref:`Pointer Aliasing Rules ` section for more +information. + If the ``inbounds`` keyword is present, the result value of a ``getelementptr`` with any non-zero indices is a :ref:`poison value ` if one of the following rules is violated: @@ -11061,16 +11080,6 @@ These rules are based on the assumption that no allocated object may cross the unsigned address space boundary, and no allocated object may be larger than half the pointer index type space. -If the ``inbounds`` keyword is not present, the offsets are added to the -base address with silently-wrapping two's complement arithmetic. If the -offsets have a different width from the pointer's index type, they are -sign-extended or truncated to the width of the pointer's index type. The result -value of the ``getelementptr`` may be outside the object pointed to by the base -pointer. The result value may not necessarily be used to access memory -though, even if it happens to point into allocated storage. See the -:ref:`Pointer Aliasing Rules ` section for more -information. - If the ``inrange`` keyword is present before any index, loading from or storing to any pointer derived from the ``getelementptr`` has undefined behavior if the load or store would access memory outside of the bounds of diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index d324c4b488f72..c8ef232082554 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -649,6 +649,8 @@ Error DataLayout::setPointerAlignmentInBits(uint32_t AddrSpace, Align ABIAlign, if (PrefAlign < ABIAlign) return reportError( "Preferred alignment cannot be less than the ABI alignment"); + if (IndexBitWidth > TypeBitWidth) + return reportError("Index width cannot be larger than pointer width"); auto I = lower_bound(Pointers, AddrSpace, [](const PointerAlignElem &A, uint32_t AddressSpace) { diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll index ca39b0b40e316..31a6a6ec0023e 100644 --- a/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll +++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll @@ -2,7 +2,6 @@ ; RUN: opt < %s --data-layout="p:64:64:64:64" -S -disable-output "-passes=print" 2>&1 | FileCheck --check-prefixes=PTR64_IDX64 %s ; RUN: opt < %s --data-layout="p:64:64:64:32" -S -disable-output "-passes=print" 2>&1 | FileCheck --check-prefixes=PTR64_IDX32 %s ; RUN: opt < %s --data-layout="p:16:16:16:16" -S -disable-output "-passes=print" 2>&1 | FileCheck --check-prefixes=PTR16_IDX16 %s -; RUN: opt < %s --data-layout="p:16:16:16:32" -S -disable-output "-passes=print" 2>&1 | FileCheck --check-prefixes=PTR16_IDX32 %s @global = external hidden global [0 x i8] @@ -63,24 +62,6 @@ define hidden ptr @trunc_ptr_to_i64(ptr %arg, ptr %arg10) { ; PTR16_IDX16-NEXT: Loop %bb11: Unpredictable symbolic max backedge-taken count. ; PTR16_IDX16-NEXT: Loop %bb11: Unpredictable predicated backedge-taken count. ; -; PTR16_IDX32-LABEL: 'trunc_ptr_to_i64' -; PTR16_IDX32-NEXT: Classifying expressions for: @trunc_ptr_to_i64 -; PTR16_IDX32-NEXT: %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ] -; PTR16_IDX32-NEXT: --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <> LoopDispositions: { %bb11: Computable } -; PTR16_IDX32-NEXT: %tmp12 = getelementptr i8, ptr %arg, i64 ptrtoint (ptr @global to i64) -; PTR16_IDX32-NEXT: --> ((trunc i64 ptrtoint (ptr @global to i64) to i32) + %arg) U: [0,131071) S: [0,131071) Exits: ((trunc i64 ptrtoint (ptr @global to i64) to i32) + %arg) LoopDispositions: { %bb11: Invariant } -; PTR16_IDX32-NEXT: %tmp13 = bitcast ptr %tmp12 to ptr -; PTR16_IDX32-NEXT: --> ((trunc i64 ptrtoint (ptr @global to i64) to i32) + %arg) U: [0,131071) S: [0,131071) Exits: ((trunc i64 ptrtoint (ptr @global to i64) to i32) + %arg) LoopDispositions: { %bb11: Invariant } -; PTR16_IDX32-NEXT: %tmp14 = load i32, ptr %tmp13, align 4 -; PTR16_IDX32-NEXT: --> %tmp14 U: full-set S: full-set Exits: <> LoopDispositions: { %bb11: Variant } -; PTR16_IDX32-NEXT: %tmp18 = add i32 %tmp, 2 -; PTR16_IDX32-NEXT: --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <> LoopDispositions: { %bb11: Computable } -; PTR16_IDX32-NEXT: Determining loop execution counts for: @trunc_ptr_to_i64 -; PTR16_IDX32-NEXT: Loop %bb11: Unpredictable backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb11: Unpredictable constant max backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb11: Unpredictable symbolic max backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb11: Unpredictable predicated backedge-taken count. -; bb: br label %bb11 @@ -154,24 +135,6 @@ define hidden ptr @trunc_ptr_to_i32(ptr %arg, ptr %arg10) { ; PTR16_IDX16-NEXT: Loop %bb11: Unpredictable symbolic max backedge-taken count. ; PTR16_IDX16-NEXT: Loop %bb11: Unpredictable predicated backedge-taken count. ; -; PTR16_IDX32-LABEL: 'trunc_ptr_to_i32' -; PTR16_IDX32-NEXT: Classifying expressions for: @trunc_ptr_to_i32 -; PTR16_IDX32-NEXT: %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ] -; PTR16_IDX32-NEXT: --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <> LoopDispositions: { %bb11: Computable } -; PTR16_IDX32-NEXT: %tmp12 = getelementptr i8, ptr %arg, i32 ptrtoint (ptr @global to i32) -; PTR16_IDX32-NEXT: --> (ptrtoint (ptr @global to i32) + %arg) U: [0,131071) S: [0,131071) Exits: (ptrtoint (ptr @global to i32) + %arg) LoopDispositions: { %bb11: Invariant } -; PTR16_IDX32-NEXT: %tmp13 = bitcast ptr %tmp12 to ptr -; PTR16_IDX32-NEXT: --> (ptrtoint (ptr @global to i32) + %arg) U: [0,131071) S: [0,131071) Exits: (ptrtoint (ptr @global to i32) + %arg) LoopDispositions: { %bb11: Invariant } -; PTR16_IDX32-NEXT: %tmp14 = load i32, ptr %tmp13, align 4 -; PTR16_IDX32-NEXT: --> %tmp14 U: full-set S: full-set Exits: <> LoopDispositions: { %bb11: Variant } -; PTR16_IDX32-NEXT: %tmp18 = add i32 %tmp, 2 -; PTR16_IDX32-NEXT: --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <> LoopDispositions: { %bb11: Computable } -; PTR16_IDX32-NEXT: Determining loop execution counts for: @trunc_ptr_to_i32 -; PTR16_IDX32-NEXT: Loop %bb11: Unpredictable backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb11: Unpredictable constant max backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb11: Unpredictable symbolic max backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb11: Unpredictable predicated backedge-taken count. -; bb: br label %bb11 @@ -245,24 +208,6 @@ define hidden ptr @trunc_ptr_to_i128(ptr %arg, ptr %arg10) { ; PTR16_IDX16-NEXT: Loop %bb11: Unpredictable symbolic max backedge-taken count. ; PTR16_IDX16-NEXT: Loop %bb11: Unpredictable predicated backedge-taken count. ; -; PTR16_IDX32-LABEL: 'trunc_ptr_to_i128' -; PTR16_IDX32-NEXT: Classifying expressions for: @trunc_ptr_to_i128 -; PTR16_IDX32-NEXT: %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ] -; PTR16_IDX32-NEXT: --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <> LoopDispositions: { %bb11: Computable } -; PTR16_IDX32-NEXT: %tmp12 = getelementptr i8, ptr %arg, i128 ptrtoint (ptr @global to i128) -; PTR16_IDX32-NEXT: --> ((trunc i128 ptrtoint (ptr @global to i128) to i32) + %arg) U: [0,131071) S: [0,131071) Exits: ((trunc i128 ptrtoint (ptr @global to i128) to i32) + %arg) LoopDispositions: { %bb11: Invariant } -; PTR16_IDX32-NEXT: %tmp13 = bitcast ptr %tmp12 to ptr -; PTR16_IDX32-NEXT: --> ((trunc i128 ptrtoint (ptr @global to i128) to i32) + %arg) U: [0,131071) S: [0,131071) Exits: ((trunc i128 ptrtoint (ptr @global to i128) to i32) + %arg) LoopDispositions: { %bb11: Invariant } -; PTR16_IDX32-NEXT: %tmp14 = load i32, ptr %tmp13, align 4 -; PTR16_IDX32-NEXT: --> %tmp14 U: full-set S: full-set Exits: <> LoopDispositions: { %bb11: Variant } -; PTR16_IDX32-NEXT: %tmp18 = add i32 %tmp, 2 -; PTR16_IDX32-NEXT: --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <> LoopDispositions: { %bb11: Computable } -; PTR16_IDX32-NEXT: Determining loop execution counts for: @trunc_ptr_to_i128 -; PTR16_IDX32-NEXT: Loop %bb11: Unpredictable backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb11: Unpredictable constant max backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb11: Unpredictable symbolic max backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb11: Unpredictable predicated backedge-taken count. -; bb: br label %bb11 @@ -319,18 +264,6 @@ define void @zext_ptr_to_i32(i32 %arg, i32 %arg6) { ; PTR16_IDX16-NEXT: Loop %bb7: Unpredictable symbolic max backedge-taken count. ; PTR16_IDX16-NEXT: Loop %bb7: Unpredictable predicated backedge-taken count. ; -; PTR16_IDX32-LABEL: 'zext_ptr_to_i32' -; PTR16_IDX32-NEXT: Classifying expressions for: @zext_ptr_to_i32 -; PTR16_IDX32-NEXT: %tmp = sub i32 %arg, ptrtoint (ptr @global to i32) -; PTR16_IDX32-NEXT: --> ((-1 * ptrtoint (ptr @global to i32)) + %arg) U: full-set S: full-set Exits: ((-1 * ptrtoint (ptr @global to i32)) + %arg) LoopDispositions: { %bb7: Invariant } -; PTR16_IDX32-NEXT: %tmp9 = select i1 %tmp8, i16 0, i16 1 -; PTR16_IDX32-NEXT: --> %tmp9 U: [0,2) S: [0,2) Exits: <> LoopDispositions: { %bb7: Variant } -; PTR16_IDX32-NEXT: Determining loop execution counts for: @zext_ptr_to_i32 -; PTR16_IDX32-NEXT: Loop %bb7: Unpredictable backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb7: Unpredictable constant max backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb7: Unpredictable symbolic max backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb7: Unpredictable predicated backedge-taken count. -; bb: br label %bb7 @@ -382,18 +315,6 @@ define void @sext_to_i32(i32 %arg, i32 %arg6) { ; PTR16_IDX16-NEXT: Loop %bb7: Unpredictable symbolic max backedge-taken count. ; PTR16_IDX16-NEXT: Loop %bb7: Unpredictable predicated backedge-taken count. ; -; PTR16_IDX32-LABEL: 'sext_to_i32' -; PTR16_IDX32-NEXT: Classifying expressions for: @sext_to_i32 -; PTR16_IDX32-NEXT: %tmp = sub i32 %arg, sext (i16 ptrtoint (ptr @global to i16) to i32) -; PTR16_IDX32-NEXT: --> ((-1 * (sext i16 ptrtoint (ptr @global to i16) to i32)) + %arg) U: full-set S: full-set Exits: ((-1 * (sext i16 ptrtoint (ptr @global to i16) to i32)) + %arg) LoopDispositions: { %bb7: Invariant } -; PTR16_IDX32-NEXT: %tmp9 = select i1 %tmp8, i16 0, i16 1 -; PTR16_IDX32-NEXT: --> %tmp9 U: [0,2) S: [0,2) Exits: <> LoopDispositions: { %bb7: Variant } -; PTR16_IDX32-NEXT: Determining loop execution counts for: @sext_to_i32 -; PTR16_IDX32-NEXT: Loop %bb7: Unpredictable backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb7: Unpredictable constant max backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb7: Unpredictable symbolic max backedge-taken count. -; PTR16_IDX32-NEXT: Loop %bb7: Unpredictable predicated backedge-taken count. -; bb: br label %bb7 @@ -463,24 +384,6 @@ define i64 @sext_like_noop(i32 %n) { ; PTR16_IDX16-NEXT: Predicates: ; PTR16_IDX16: Loop %for.body: Trip multiple is 1 ; -; PTR16_IDX32-LABEL: 'sext_like_noop' -; PTR16_IDX32-NEXT: Classifying expressions for: @sext_like_noop -; PTR16_IDX32-NEXT: %ii = sext i32 %i to i64 -; PTR16_IDX32-NEXT: --> (sext i32 {1,+,1}<%for.body> to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) --> (-1 + (zext i32 ptrtoint (ptr @sext_like_noop to i32) to i64)) U: [-1,65535) S: [-1,65535) -; PTR16_IDX32-NEXT: %div = sdiv i64 55555, %ii -; PTR16_IDX32-NEXT: --> %div U: full-set S: full-set -; PTR16_IDX32-NEXT: %i = phi i32 [ %inc, %for.body ], [ 1, %entry ] -; PTR16_IDX32-NEXT: --> {1,+,1}<%for.body> U: [1,0) S: [1,0) Exits: (-1 + ptrtoint (ptr @sext_like_noop to i32)) LoopDispositions: { %for.body: Computable } -; PTR16_IDX32-NEXT: %inc = add nuw i32 %i, 1 -; PTR16_IDX32-NEXT: --> {2,+,1}<%for.body> U: [2,0) S: [2,0) Exits: ptrtoint (ptr @sext_like_noop to i32) LoopDispositions: { %for.body: Computable } -; PTR16_IDX32-NEXT: Determining loop execution counts for: @sext_like_noop -; PTR16_IDX32-NEXT: Loop %for.body: backedge-taken count is (-2 + ptrtoint (ptr @sext_like_noop to i32)) -; PTR16_IDX32-NEXT: Loop %for.body: constant max backedge-taken count is -1 -; PTR16_IDX32-NEXT: Loop %for.body: symbolic max backedge-taken count is (-2 + ptrtoint (ptr @sext_like_noop to i32)) -; PTR16_IDX32-NEXT: Loop %for.body: Predicated backedge-taken count is (-2 + ptrtoint (ptr @sext_like_noop to i32)) -; PTR16_IDX32-NEXT: Predicates: -; PTR16_IDX32: Loop %for.body: Trip multiple is 1 -; entry: %cmp6 = icmp sgt i32 %n, 1 br label %for.body diff --git a/llvm/test/Assembler/invalid-datalayout-index-size.ll b/llvm/test/Assembler/invalid-datalayout-index-size.ll new file mode 100644 index 0000000000000..dc608cdd56a04 --- /dev/null +++ b/llvm/test/Assembler/invalid-datalayout-index-size.ll @@ -0,0 +1,3 @@ +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +target datalayout = "p:64:64:64:128" +; CHECK: Index width cannot be larger than pointer width diff --git a/llvm/test/Transforms/InferAlignment/ptrmask.ll b/llvm/test/Transforms/InferAlignment/ptrmask.ll index afab872d16d5e..7fb0220e92b96 100644 --- a/llvm/test/Transforms/InferAlignment/ptrmask.ll +++ b/llvm/test/Transforms/InferAlignment/ptrmask.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt < %s -passes=infer-alignment -S | FileCheck %s -target datalayout = "p1:64:64:64:32-p2:64:64:64:128" +target datalayout = "p1:64:64:64:32" ; ------------------------------------------------------------------------------ ; load instructions @@ -88,18 +88,5 @@ define i8 @smaller_index_type(ptr addrspace(1) %ptr) { ret i8 %load } -define i8 @larger_index_type(ptr addrspace(2) %ptr) { -; CHECK-LABEL: define i8 @larger_index_type -; CHECK-SAME: (ptr addrspace(2) [[PTR:%.*]]) { -; CHECK-NEXT: [[PTR2:%.*]] = call ptr addrspace(2) @llvm.ptrmask.p2.i128(ptr addrspace(2) [[PTR]], i128 -4) -; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(2) [[PTR2]], align 4 -; CHECK-NEXT: ret i8 [[LOAD]] -; - %ptr2 = call ptr addrspace(2) @llvm.ptrmask.p2.i128(ptr addrspace(2) %ptr, i128 -4) - %load = load i8, ptr addrspace(2) %ptr2, align 1 - ret i8 %load -} - declare ptr @llvm.ptrmask.p0.i64(ptr, i64) declare ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1), i32) -declare ptr addrspace(2) @llvm.ptrmask.p2.i128(ptr addrspace(2), i128) diff --git a/mlir/test/Target/LLVMIR/Import/data-layout.ll b/mlir/test/Target/LLVMIR/Import/data-layout.ll index b46f5fc4f2ae2..4336f0dbbc4a9 100644 --- a/mlir/test/Target/LLVMIR/Import/data-layout.ll +++ b/mlir/test/Target/LLVMIR/Import/data-layout.ll @@ -36,11 +36,11 @@ target datalayout = "e-m:e-p270:32:64-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 ; CHECK: dlti.dl_spec = ; CHECK: #dlti.dl_spec< ; CHECK-DAG: #dlti.dl_entry<"dlti.endianness", "big"> -; CHECK-DAG: #dlti.dl_entry, dense<[16, 32, 64, 128]> : vector<4xi32>> +; CHECK-DAG: #dlti.dl_entry, dense<[16, 32, 64, 8]> : vector<4xi32>> ; CHECK-DAG: #dlti.dl_entry, dense<[16, 32, 64, 16]> : vector<4xi32>> ; CHECK-DAG: #dlti.dl_entry<"dlti.alloca_memory_space", 1 : ui32> ; CHECK-DAG: #dlti.dl_entry : vector<2xi32>> -target datalayout = "A1-E-p270:16:32:64:128-p271:16:32:64-i64:64:128" +target datalayout = "A1-E-p270:16:32:64:8-p271:16:32:64-i64:64:128" ; // ----- diff --git a/mlir/test/Target/LLVMIR/data-layout.mlir b/mlir/test/Target/LLVMIR/data-layout.mlir index 74941d84d7d21..f1304e12c303d 100644 --- a/mlir/test/Target/LLVMIR/data-layout.mlir +++ b/mlir/test/Target/LLVMIR/data-layout.mlir @@ -7,7 +7,7 @@ // CHECK: i64:64:128 // CHECK: f80:128:256 // CHECK: p0:32:64:128 -// CHECK: p1:32:32:32:64 +// CHECK: p1:32:32:32:16 module attributes {dlti.dl_spec = #dlti.dl_spec< #dlti.dl_entry<"dlti.endianness", "big">, #dlti.dl_entry<"dlti.alloca_memory_space", 4 : ui32>, @@ -16,7 +16,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec< #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<3xi32>>, -#dlti.dl_entry, dense<[32,32,32,64]> : vector<4xi32>> +#dlti.dl_entry, dense<[32,32,32,16]> : vector<4xi32>> >} { llvm.func @foo() { llvm.return From 0c6759b576947def0210e1bfb135fe8092d51c45 Mon Sep 17 00:00:00 2001 From: Johannes Reifferscheid Date: Thu, 26 Oct 2023 10:33:45 +0200 Subject: [PATCH 028/877] [Bazel] Fixes for ec6da06. --- .../bazel/llvm-project-overlay/mlir/examples/toy/Ch6/BUILD.bazel | 1 + .../bazel/llvm-project-overlay/mlir/examples/toy/Ch7/BUILD.bazel | 1 + 2 files changed, 2 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch6/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch6/BUILD.bazel index af5573b1141cd..7543db7c2c279 100644 --- a/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch6/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch6/BUILD.bazel @@ -97,6 +97,7 @@ cc_binary( ":ToyInterfacesIncGen", ":ToyOpsIncGen", "//llvm:Core", + "//llvm:OrcJIT", "//llvm:Support", "//mlir:AffineDialect", "//mlir:AffineToStandard", diff --git a/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch7/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch7/BUILD.bazel index 6e3ad89a9430a..297fb4e17e8e9 100644 --- a/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch7/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch7/BUILD.bazel @@ -97,6 +97,7 @@ cc_binary( ":ToyInterfacesIncGen", ":ToyOpsIncGen", "//llvm:Core", + "//llvm:OrcJIT", "//llvm:Support", "//mlir:AffineDialect", "//mlir:AffineToStandard", From 80ff42bc7bb8188fb0c239c9f2035faf1398e59a Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 26 Oct 2023 10:36:26 +0200 Subject: [PATCH 029/877] [scev-aa] Make TypeSize -> uint64_t conversion explicit Some versions of MSVC have issues with the implicit conversion, and it also makes it clearer what's going on (the current code doesn't handle scalable vectors) --- llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index dc1af1bbb1d16..af8232b03f1ed 100644 --- a/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -55,10 +55,10 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA, if (canComputePointerDiff(SE, AS, BS)) { unsigned BitWidth = SE.getTypeSizeInBits(AS->getType()); APInt ASizeInt(BitWidth, LocA.Size.hasValue() - ? LocA.Size.getValue() + ? static_cast(LocA.Size.getValue()) : MemoryLocation::UnknownSize); APInt BSizeInt(BitWidth, LocB.Size.hasValue() - ? LocB.Size.getValue() + ? static_cast(LocB.Size.getValue()) : MemoryLocation::UnknownSize); // Compute the difference between the two pointers. From 339faffd053b60bee3515fe7c4cca5e76f2cf427 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Thu, 26 Oct 2023 09:50:13 +0100 Subject: [PATCH 030/877] Revert "[AArch64] Move SLS later in pass pipeline" The (MF.size() == 0) assertis is triggering when building at -O0. Reverting this while I work out what is going wrong. This reverts commit 7e8eccd990d37d2771ca5ad7a84f54c3cfc4a5e1. --- .../Target/AArch64/AArch64SLSHardening.cpp | 3 +-- .../Target/AArch64/AArch64TargetMachine.cpp | 5 ++-- llvm/test/CodeGen/AArch64/O0-pipeline.ll | 4 ++-- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 4 ++-- .../AArch64/arm64-opt-remarks-lazy-bfi.ll | 24 ++++--------------- .../AArch64/sls-stackprotector-outliner.ll | 12 ++++------ 6 files changed, 16 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp index 518ca336b5cde..ff56259eb34a7 100644 --- a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp +++ b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp @@ -222,8 +222,7 @@ void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) { const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - assert (MF.size() == 0); - MF.push_back(MF.CreateMachineBasicBlock()); + assert (MF.size() == 1); MachineBasicBlock *Entry = &MF.front(); Entry->clear(); diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 02670fff35cce..3d818c76bd4b7 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -796,6 +796,9 @@ void AArch64PassConfig::addPreSched2() { // info. addPass(createAArch64SpeculationHardeningPass()); + addPass(createAArch64IndirectThunks()); + addPass(createAArch64SLSHardeningPass()); + if (TM->getOptLevel() != CodeGenOptLevel::None) { if (EnableFalkorHWPFFix) addPass(createFalkorHWPFFixPass()); @@ -828,8 +831,6 @@ void AArch64PassConfig::addPreEmitPass() { } void AArch64PassConfig::addPostBBSections() { - addPass(createAArch64IndirectThunks()); - addPass(createAArch64SLSHardeningPass()); addPass(createAArch64PointerAuthPass()); if (EnableBranchTargets) addPass(createAArch64BranchTargetsPass()); diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll index d1e38b85fa9c3..4f87bb2a3ee81 100644 --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -64,6 +64,8 @@ ; CHECK-NEXT: AArch64 pseudo instruction expansion pass ; CHECK-NEXT: Insert KCFI indirect call checks ; CHECK-NEXT: AArch64 speculation hardening pass +; CHECK-NEXT: AArch64 Indirect Thunks +; CHECK-NEXT: AArch64 sls hardening pass ; CHECK-NEXT: Analyze Machine Code For Garbage Collection ; CHECK-NEXT: Insert fentry calls ; CHECK-NEXT: Insert XRay ops @@ -73,8 +75,6 @@ ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Sanitizer Binary Metadata -; CHECK-NEXT: AArch64 Indirect Thunks -; CHECK-NEXT: AArch64 sls hardening pass ; CHECK-NEXT: AArch64 Pointer Authentication ; CHECK-NEXT: AArch64 Branch Targets ; CHECK-NEXT: Branch relaxation pass diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 85a9eb49a9a26..f5c1c3c291cb5 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -201,6 +201,8 @@ ; CHECK-NEXT: AArch64 load / store optimization pass ; CHECK-NEXT: Insert KCFI indirect call checks ; CHECK-NEXT: AArch64 speculation hardening pass +; CHECK-NEXT: AArch64 Indirect Thunks +; CHECK-NEXT: AArch64 sls hardening pass ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Falkor HW Prefetch Fix Late Phase @@ -221,8 +223,6 @@ ; CHECK-NEXT: Machine Sanitizer Binary Metadata ; CHECK-NEXT: Machine Outliner ; CHECK-NEXT: FunctionPass Manager -; CHECK-NEXT: AArch64 Indirect Thunks -; CHECK-NEXT: AArch64 sls hardening pass ; CHECK-NEXT: AArch64 Pointer Authentication ; CHECK-NEXT: AArch64 Branch Targets ; CHECK-NEXT: Branch relaxation pass diff --git a/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll b/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll index 3ffaf962425b3..580886520789e 100644 --- a/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll @@ -32,16 +32,8 @@ ; HOTNESS: Freeing Pass 'Machine Outliner' ; HOTNESS-NEXT: Executing Pass 'Function Pass Manager' -; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... -; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... -; HOTNESS-NEXT: Executing Pass 'AArch64 Indirect Thunks' on Function 'empty_func'... -; HOTNESS-NEXT: Freeing Pass 'AArch64 Indirect Thunks' on Function 'empty_func'... -; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... -; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... -; HOTNESS-NEXT: Executing Pass 'AArch64 sls hardening pass' on Function 'empty_func'... -; HOTNESS-NEXT: Freeing Pass 'AArch64 sls hardening pass' on Function 'empty_func'... -; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... -; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... +; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' +; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' ; HOTNESS-NEXT: Executing Pass 'AArch64 Pointer Authentication' on Function 'empty_func'... ; HOTNESS-NEXT: Freeing Pass 'AArch64 Pointer Authentication' on Function 'empty_func'... ; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... @@ -81,16 +73,8 @@ ; NO_HOTNESS: Freeing Pass 'Machine Outliner' ; NO_HOTNESS-NEXT: Executing Pass 'Function Pass Manager' -; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... -; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... -; NO_HOTNESS-NEXT: Executing Pass 'AArch64 Indirect Thunks' on Function 'empty_func'... -; NO_HOTNESS-NEXT: Freeing Pass 'AArch64 Indirect Thunks' on Function 'empty_func'... -; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... -; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... -; NO_HOTNESS-NEXT: Executing Pass 'AArch64 sls hardening pass' on Function 'empty_func'... -; NO_HOTNESS-NEXT: Freeing Pass 'AArch64 sls hardening pass' on Function 'empty_func'... -; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... -; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' on Function 'empty_func'... +; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' +; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' ; NO_HOTNESS-NEXT: Executing Pass 'AArch64 Pointer Authentication' on Function 'empty_func'... ; NO_HOTNESS-NEXT: Freeing Pass 'AArch64 Pointer Authentication' on Function 'empty_func'... ; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' on Function 'empty_func'... diff --git a/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll b/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll index 809279eac576b..9e5e55512df81 100644 --- a/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll +++ b/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll @@ -18,8 +18,7 @@ define hidden void @_ZTv0_n24_N2C6D1Ev(ptr %this) minsize sspreq "target-feature ; CHECK-NEXT: b.ne .LBB0_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: add x0, x0, x8 -; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: bl OUTLINED_FUNCTION_1 ; CHECK-NEXT: b _ZN2C6D1Ev ; CHECK-NEXT: dsb sy ; CHECK-NEXT: isb @@ -46,8 +45,7 @@ define hidden void @_ZTv0_n24_N2C6D0Ev(ptr %this) minsize sspreq "target-feature ; CHECK-NEXT: b.ne .LBB1_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: add x0, x0, x8 -; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: bl OUTLINED_FUNCTION_1 ; CHECK-NEXT: b _ZN2C6D0Ev ; CHECK-NEXT: dsb sy ; CHECK-NEXT: isb @@ -73,8 +71,7 @@ define hidden void @_ZTv0_n24_N3C10D1Ev(ptr %this) minsize sspreq "target-featur ; CHECK-NEXT: b.ne .LBB2_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: add x0, x0, x8 -; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: bl OUTLINED_FUNCTION_1 ; CHECK-NEXT: b _ZN3C10D1Ev ; CHECK-NEXT: dsb sy ; CHECK-NEXT: isb @@ -100,8 +97,7 @@ define hidden void @_ZTv0_n24_N3C10D0Ev(ptr %this) minsize sspreq "target-featur ; CHECK-NEXT: b.ne .LBB3_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: add x0, x0, x8 -; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: bl OUTLINED_FUNCTION_1 ; CHECK-NEXT: b _ZN3C10D0Ev ; CHECK-NEXT: dsb sy ; CHECK-NEXT: isb From cf3ac964dc0e1b967594aee2bdbfb6a4518e4dfe Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 26 Oct 2023 11:02:56 +0200 Subject: [PATCH 031/877] [InstCombine] Add additional demanded bits tests for shifts (NFC) --- llvm/test/Transforms/InstCombine/shift.ll | 72 +++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index bfed2dfe55fd5..7b9626331ff29 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -2126,3 +2126,75 @@ define <2 x i8> @ashr_vec_or6_fail(<2 x i8> %x, <2 x i8> %c) { %y = ashr <2 x i8> %x, %amt ret <2 x i8> %y } + +define i16 @lshr_and_not_demanded(i8 %x) { +; CHECK-LABEL: @lshr_and_not_demanded( +; CHECK-NEXT: [[Y_EXT:%.*]] = sext i8 [[X:%.*]] to i16 +; CHECK-NEXT: [[SHR:%.*]] = lshr i16 [[Y_EXT]], 1 +; CHECK-NEXT: ret i16 [[SHR]] +; + %y = and i8 %x, -2 + %y.ext = sext i8 %y to i16 + %shr = lshr i16 %y.ext, 1 + ret i16 %shr +} + +define i16 @lshr_exact_and_not_demanded(i8 %x) { +; CHECK-LABEL: @lshr_exact_and_not_demanded( +; CHECK-NEXT: [[Y:%.*]] = and i8 [[X:%.*]], -2 +; CHECK-NEXT: [[Y_EXT:%.*]] = sext i8 [[Y]] to i16 +; CHECK-NEXT: [[SHR:%.*]] = lshr exact i16 [[Y_EXT]], 1 +; CHECK-NEXT: ret i16 [[SHR]] +; + %y = and i8 %x, -2 + %y.ext = sext i8 %y to i16 + %shr = lshr exact i16 %y.ext, 1 + ret i16 %shr +} + +define i16 @lshr_and_demanded(i8 %x) { +; CHECK-LABEL: @lshr_and_demanded( +; CHECK-NEXT: [[Y:%.*]] = and i8 [[X:%.*]], -4 +; CHECK-NEXT: [[Y_EXT:%.*]] = sext i8 [[Y]] to i16 +; CHECK-NEXT: [[SHR:%.*]] = lshr exact i16 [[Y_EXT]], 1 +; CHECK-NEXT: ret i16 [[SHR]] +; + %y = and i8 %x, -4 + %y.ext = sext i8 %y to i16 + %shr = lshr i16 %y.ext, 1 + ret i16 %shr +} + +define i16 @ashr_umax_not_demanded(i16 %x) { +; CHECK-LABEL: @ashr_umax_not_demanded( +; CHECK-NEXT: [[SHR:%.*]] = ashr i16 [[X:%.*]], 1 +; CHECK-NEXT: ret i16 [[SHR]] +; + %y = call i16 @llvm.umax.i16(i16 %x, i16 1) + %shr = ashr i16 %y, 1 + ret i16 %shr +} + +define i16 @ashr_exact_umax_not_demanded(i16 %x) { +; CHECK-LABEL: @ashr_exact_umax_not_demanded( +; CHECK-NEXT: [[Y:%.*]] = call i16 @llvm.umax.i16(i16 [[X:%.*]], i16 1) +; CHECK-NEXT: [[SHR:%.*]] = ashr exact i16 [[Y]], 1 +; CHECK-NEXT: ret i16 [[SHR]] +; + %y = call i16 @llvm.umax.i16(i16 %x, i16 1) + %shr = ashr exact i16 %y, 1 + ret i16 %shr +} + +define i16 @ashr_umax_demanded(i16 %x) { +; CHECK-LABEL: @ashr_umax_demanded( +; CHECK-NEXT: [[Y:%.*]] = call i16 @llvm.umax.i16(i16 [[X:%.*]], i16 2) +; CHECK-NEXT: [[SHR:%.*]] = ashr i16 [[Y]], 1 +; CHECK-NEXT: ret i16 [[SHR]] +; + %y = call i16 @llvm.umax.i16(i16 %x, i16 2) + %shr = ashr i16 %y, 1 + ret i16 %shr +} + +declare i16 @llvm.umax.i16(i16, i16) From 605fadf0ca21ceba3331c1a39230fc7a45ea3134 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 26 Oct 2023 11:06:15 +0200 Subject: [PATCH 032/877] [libc] Add --sweep-min-size flag for benchmarks (#70302) We have --sweep-max-size, it's reasonable to have --sweep-min-size as well. It can be used when working on the logic for larger sizes, or to collect a profile for larger sizes only. --- libc/benchmarks/LibcMemoryBenchmarkMain.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/libc/benchmarks/LibcMemoryBenchmarkMain.cpp b/libc/benchmarks/LibcMemoryBenchmarkMain.cpp index acd7c30717597..bc6fd8b38cb6d 100644 --- a/libc/benchmarks/LibcMemoryBenchmarkMain.cpp +++ b/libc/benchmarks/LibcMemoryBenchmarkMain.cpp @@ -42,9 +42,15 @@ static cl::opt SizeDistributionName("size-distribution-name", cl::desc("The name of the distribution to use")); -static cl::opt - SweepMode("sweep-mode", - cl::desc("If set, benchmark all sizes from 0 to sweep-max-size")); +static cl::opt SweepMode( + "sweep-mode", + cl::desc( + "If set, benchmark all sizes from sweep-min-size to sweep-max-size")); + +static cl::opt + SweepMinSize("sweep-min-size", + cl::desc("The minimum size to use in sweep-mode"), + cl::init(0)); static cl::opt SweepMaxSize("sweep-max-size", @@ -185,7 +191,7 @@ struct MemfunctionBenchmarkSweep final : public MemfunctionBenchmarkBase { BO.InitialIterations = 100; auto &Measurements = Study.Measurements; Measurements.reserve(NumTrials * SweepMaxSize); - for (size_t Size = 0; Size <= SweepMaxSize; ++Size) { + for (size_t Size = SweepMinSize; Size <= SweepMaxSize; ++Size) { CurrentSweepSize = Size; runTrials(BO, Measurements); } From 274ce8895b29e8be047a68629efda2e9d3ad1f01 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 26 Oct 2023 10:07:23 +0100 Subject: [PATCH 033/877] [mlir] Remove `printCString()` from RunnerUtils (#70197) This is now unused and can be replaced with `printString()` from CRunnerUtils or `vector.print str`. --- .../mlir/ExecutionEngine/RunnerUtils.h | 1 - mlir/lib/ExecutionEngine/RunnerUtils.cpp | 3 --- mlir/test/mlir-cpu-runner/print.mlir | 19 ------------------- 3 files changed, 23 deletions(-) delete mode 100644 mlir/test/mlir-cpu-runner/print.mlir diff --git a/mlir/include/mlir/ExecutionEngine/RunnerUtils.h b/mlir/include/mlir/ExecutionEngine/RunnerUtils.h index 56c0983e5f15b..f998ed53b3403 100644 --- a/mlir/include/mlir/ExecutionEngine/RunnerUtils.h +++ b/mlir/include/mlir/ExecutionEngine/RunnerUtils.h @@ -392,7 +392,6 @@ extern "C" MLIR_RUNNERUTILS_EXPORT void printMemrefF64(int64_t rank, void *ptr); extern "C" MLIR_RUNNERUTILS_EXPORT void printMemrefInd(int64_t rank, void *ptr); extern "C" MLIR_RUNNERUTILS_EXPORT void printMemrefC32(int64_t rank, void *ptr); extern "C" MLIR_RUNNERUTILS_EXPORT void printMemrefC64(int64_t rank, void *ptr); -extern "C" MLIR_RUNNERUTILS_EXPORT void printCString(char *str); extern "C" MLIR_RUNNERUTILS_EXPORT void _mlir_ciface_printMemref0dF32(StridedMemRefType *m); diff --git a/mlir/lib/ExecutionEngine/RunnerUtils.cpp b/mlir/lib/ExecutionEngine/RunnerUtils.cpp index 4618866f68a44..bbfd3a6b11c2a 100644 --- a/mlir/lib/ExecutionEngine/RunnerUtils.cpp +++ b/mlir/lib/ExecutionEngine/RunnerUtils.cpp @@ -158,9 +158,6 @@ extern "C" void printMemrefC64(int64_t rank, void *ptr) { _mlir_ciface_printMemrefC64(&descriptor); } -/// Deprecated. This should be unified with printString from CRunnerUtils. -extern "C" void printCString(char *str) { fputs(str, stdout); } - extern "C" void _mlir_ciface_printMemref0dF32(StridedMemRefType *M) { impl::printMemRef(*M); } diff --git a/mlir/test/mlir-cpu-runner/print.mlir b/mlir/test/mlir-cpu-runner/print.mlir deleted file mode 100644 index e36c7154f03a7..0000000000000 --- a/mlir/test/mlir-cpu-runner/print.mlir +++ /dev/null @@ -1,19 +0,0 @@ -// RUN: mlir-opt %s -pass-pipeline="builtin.module(convert-func-to-llvm,reconcile-unrealized-casts)" \ -// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ -// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils \ -// RUN: | FileCheck %s - - -llvm.mlir.global internal constant @str_global("String to print\0A") -llvm.func @printCString(!llvm.ptr) - -func.func @main() { - %0 = llvm.mlir.addressof @str_global : !llvm.ptr> - %1 = llvm.mlir.constant(0 : index) : i64 - %2 = llvm.getelementptr %0[%1, %1] - : (!llvm.ptr>, i64, i64) -> !llvm.ptr - llvm.call @printCString(%2) : (!llvm.ptr) -> () - return -} - -// CHECK: String to print From ba3d6e0499231c5f425b92cc126116b0e184dbf1 Mon Sep 17 00:00:00 2001 From: Piotr Sobczak Date: Thu, 26 Oct 2023 11:34:33 +0200 Subject: [PATCH 034/877] [AMDGPU] Rematerialize scalar loads (#68778) Extend the list of instructions that can be rematerialized in SIInstrInfo::isReallyTriviallyReMaterializable() to support scalar loads. Try shrinking instructions to remat only the part needed for current context. Add SIInstrInfo::reMaterialize target hook, and handle shrinking of S_LOAD_DWORDX16_IMM to S_LOAD_DWORDX8_IMM as a proof of concept. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 106 +++++++++++++++- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 5 + .../hsa-metadata-kernel-code-props-v3.ll | 4 +- llvm/test/CodeGen/AMDGPU/remat-smrd.mir | 116 +++++------------- .../AMDGPU/snippet-copy-bundle-regression.mir | 42 +------ 5 files changed, 146 insertions(+), 127 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 827c2c1566384..7046c37ef6efd 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -106,9 +106,27 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); } +static bool canRemat(const MachineInstr &MI) { + + if (SIInstrInfo::isVOP1(MI) || SIInstrInfo::isVOP2(MI) || + SIInstrInfo::isVOP3(MI) || SIInstrInfo::isSDWA(MI) || + SIInstrInfo::isSALU(MI)) + return true; + + if (SIInstrInfo::isSMRD(MI)) { + return !MI.memoperands_empty() && + llvm::all_of(MI.memoperands(), [](const MachineMemOperand *MMO) { + return MMO->isLoad() && MMO->isInvariant(); + }); + } + + return false; +} + bool SIInstrInfo::isReallyTriviallyReMaterializable( const MachineInstr &MI) const { - if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI) || isSALU(MI)) { + + if (canRemat(MI)) { // Normally VALU use of exec would block the rematerialization, but that // is OK in this case to have an implicit exec read as all VALU do. // We really want all of the generic logic for this except for this. @@ -2434,6 +2452,92 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } +void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, Register DestReg, + unsigned SubIdx, const MachineInstr &Orig, + const TargetRegisterInfo &RI) const { + + // Try shrinking the instruction to remat only the part needed for current + // context. + // TODO: Handle more cases. + unsigned Opcode = Orig.getOpcode(); + switch (Opcode) { + case AMDGPU::S_LOAD_DWORDX16_IMM: + case AMDGPU::S_LOAD_DWORDX8_IMM: { + if (SubIdx != 0) + break; + + if (I == MBB.end()) + break; + + if (I->isBundled()) + break; + + // Look for a single use of the register that is also a subreg. + Register RegToFind = Orig.getOperand(0).getReg(); + MachineOperand *UseMO = nullptr; + for (auto &CandMO : I->operands()) { + if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef()) + continue; + if (UseMO) { + UseMO = nullptr; + break; + } + UseMO = &CandMO; + } + if (!UseMO || UseMO->getSubReg() == AMDGPU::NoSubRegister) + break; + + unsigned Offset = RI.getSubRegIdxOffset(UseMO->getSubReg()); + unsigned SubregSize = RI.getSubRegIdxSize(UseMO->getSubReg()); + + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + assert(MRI.use_nodbg_empty(DestReg) && "DestReg should have no users yet."); + + unsigned NewOpcode = -1; + if (SubregSize == 256) + NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM; + else if (SubregSize == 128) + NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM; + else + break; + + const MCInstrDesc &TID = get(NewOpcode); + const TargetRegisterClass *NewRC = + RI.getAllocatableClass(getRegClass(TID, 0, &RI, *MF)); + MRI.setRegClass(DestReg, NewRC); + + UseMO->setReg(DestReg); + UseMO->setSubReg(AMDGPU::NoSubRegister); + + // Use a smaller load with the desired size, possibly with updated offset. + MachineInstr *MI = MF->CloneMachineInstr(&Orig); + MI->setDesc(TID); + MI->getOperand(0).setReg(DestReg); + MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister); + if (Offset) { + MachineOperand *OffsetMO = getNamedOperand(*MI, AMDGPU::OpName::offset); + int64_t FinalOffset = OffsetMO->getImm() + Offset / 8; + OffsetMO->setImm(FinalOffset); + } + SmallVector NewMMOs; + for (const MachineMemOperand *MemOp : Orig.memoperands()) + NewMMOs.push_back(MF->getMachineMemOperand(MemOp, MemOp->getPointerInfo(), + SubregSize / 8)); + MI->setMemRefs(*MF, NewMMOs); + + MBB.insert(I, MI); + return; + } + + default: + break; + } + + TargetInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, RI); +} + std::pair SIInstrInfo::expandMovDPP64(MachineInstr &MI) const { assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 5ef17c44f7de3..a64cf0244e4c0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -275,6 +275,11 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { bool expandPostRAPseudo(MachineInstr &MI) const override; + void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + Register DestReg, unsigned SubIdx, + const MachineInstr &Orig, + const TargetRegisterInfo &TRI) const override; + // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp // instructions. Returns a pair of generated instructions. // Can split either post-RA with physical registers or pre-RA with diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll index d6f7a92af9dcb..1999c7b065e68 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll @@ -47,8 +47,8 @@ entry: } ; CHECK: .name: num_spilled_sgprs -; GFX700: .sgpr_spill_count: 38 -; GFX803: .sgpr_spill_count: 22 +; GFX700: .sgpr_spill_count: 12 +; GFX803: .sgpr_spill_count: 12 ; GFX900: .sgpr_spill_count: 48 ; GFX1010: .sgpr_spill_count: 48 ; CHECK: .symbol: num_spilled_sgprs.kd diff --git a/llvm/test/CodeGen/AMDGPU/remat-smrd.mir b/llvm/test/CodeGen/AMDGPU/remat-smrd.mir index 753cb135fdae9..95eac12a65389 100644 --- a/llvm/test/CodeGen/AMDGPU/remat-smrd.mir +++ b/llvm/test/CodeGen/AMDGPU/remat-smrd.mir @@ -12,15 +12,11 @@ body: | ; GCN: liveins: $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 4, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -44,16 +40,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s64), addrspace 4) - ; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr2_sgpr3, %stack.1, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.1, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s64), addrspace 4) - ; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s64), addrspace 4) - ; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.2, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.1, align 4, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3 - ; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s64), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3 - ; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s64), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -77,16 +67,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s128), addrspace 4) - ; GCN-NEXT: SI_SPILL_S128_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sp_reg :: (store (s128) into %stack.1, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s128), addrspace 4) - ; GCN-NEXT: SI_SPILL_S128_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, %stack.0, implicit $exec, implicit $sp_reg :: (store (s128) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s128), addrspace 4) - ; GCN-NEXT: SI_SPILL_S128_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, %stack.2, implicit $exec, implicit $sp_reg :: (store (s128) into %stack.2, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s128) from %stack.1, align 4, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s128) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s128), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s128) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s128), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -110,16 +94,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s256), addrspace 4) - ; GCN-NEXT: SI_SPILL_S256_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, %stack.1, implicit $exec, implicit $sp_reg :: (store (s256) into %stack.1, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s256), addrspace 4) - ; GCN-NEXT: SI_SPILL_S256_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, %stack.0, implicit $exec, implicit $sp_reg :: (store (s256) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s256), addrspace 4) - ; GCN-NEXT: SI_SPILL_S256_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, %stack.2, implicit $exec, implicit $sp_reg :: (store (s256) into %stack.2, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S256_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s256) from %stack.1, align 4, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S256_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s256) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s256), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S256_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s256) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s256), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -143,16 +121,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s512), addrspace 4) - ; GCN-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.1, implicit $exec, implicit $sp_reg :: (store (s512) into %stack.1, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s512), addrspace 4) - ; GCN-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sp_reg :: (store (s512) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s512), addrspace 4) - ; GCN-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.2, implicit $exec, implicit $sp_reg :: (store (s512) into %stack.2, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.1, align 4, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s512), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s512), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -175,13 +147,12 @@ body: | ; GCN: liveins: $sgpr8_sgpr9, $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sp_reg :: (store (s512) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 32, 0 :: (dereferenceable invariant load (s256), align 4, addrspace 4) + ; GCN-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = COPY killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s256), align 4, addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -206,14 +177,12 @@ body: | ; GCN: liveins: $sgpr8_sgpr9, $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sp_reg :: (store (s512) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) ; GCN-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = KILL killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s256), align 4, addrspace 4) ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = KILL killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit renamable $sgpr0_sgpr1 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 @@ -238,10 +207,8 @@ body: | ; GCN: liveins: $sgpr8_sgpr9, $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sp_reg :: (store (s512) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -265,13 +232,12 @@ body: | ; GCN: liveins: $sgpr8_sgpr9, $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s256), align 4, addrspace 4) - ; GCN-NEXT: SI_SPILL_S256_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, %stack.0, implicit $exec, implicit $sp_reg :: (store (s256) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s256), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S256_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s256) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 16, 0 :: (dereferenceable invariant load (s128), align 4, addrspace 4) + ; GCN-NEXT: renamable $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11 ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s256), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S256_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s256) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s128), align 4, addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -331,16 +297,10 @@ body: | ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr0 = COPY $sgpr10 ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR renamable $sgpr2_sgpr3, renamable $sgpr0, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR renamable $sgpr2_sgpr3, renamable $sgpr0, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR renamable $sgpr2_sgpr3, renamable $sgpr0, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.2, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.2, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR renamable $sgpr2_sgpr3, renamable $sgpr0, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR renamable $sgpr2_sgpr3, renamable $sgpr0, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3, implicit killed renamable $sgpr0 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -366,16 +326,10 @@ body: | ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr0 = COPY $sgpr10 ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR_IMM renamable $sgpr2_sgpr3, renamable $sgpr0, 0, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR_IMM renamable $sgpr2_sgpr3, renamable $sgpr0, 4, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR_IMM renamable $sgpr2_sgpr3, renamable $sgpr0, 8, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.2, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.2, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR_IMM renamable $sgpr2_sgpr3, renamable $sgpr0, 4, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR_IMM renamable $sgpr2_sgpr3, renamable $sgpr0, 8, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3, implicit killed renamable $sgpr0 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -430,15 +384,11 @@ body: | ; GCN: liveins: $sgpr8_sgpr9_sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 - ; GCN-NEXT: renamable $sgpr0 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: renamable $sgpr1 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr4_sgpr5_sgpr6_sgpr7, 4, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: renamable $sgpr0 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr4_sgpr5_sgpr6_sgpr7, 8, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr4_sgpr5_sgpr6_sgpr7, 8, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 %0:sgpr_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 @@ -461,15 +411,11 @@ body: | ; GCN: liveins: $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr0 = S_SCRATCH_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0, implicit $flat_scr :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: renamable $sgpr1 = S_SCRATCH_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 4, 0, implicit $flat_scr :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: renamable $sgpr0 = S_SCRATCH_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0, implicit $flat_scr :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_SCRATCH_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0, implicit $flat_scr :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_SCRATCH_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0, implicit $flat_scr :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -492,15 +438,11 @@ body: | ; GCN: liveins: $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0 :: (invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 4, 0 :: (invariant load (s32), addrspace 4) - ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0 :: (invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0 :: (invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0 :: (invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 diff --git a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir index 55639a27fd5c7..355829825146d 100644 --- a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir +++ b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir @@ -33,7 +33,6 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $vgpr1 = IMPLICIT_DEF ; CHECK-NEXT: renamable $sgpr34_sgpr35 = IMPLICIT_DEF ; CHECK-NEXT: dead renamable $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: renamable $sgpr41 = IMPLICIT_DEF @@ -41,16 +40,6 @@ body: | ; CHECK-NEXT: renamable $sgpr36_sgpr37 = IMPLICIT_DEF ; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 0, 0 :: (dereferenceable invariant load (s256), align 16, addrspace 4) ; CHECK-NEXT: dead renamable $sgpr4 = S_LOAD_DWORD_IMM renamable $sgpr38_sgpr39, 48, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr1, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr5, 1, killed $vgpr1 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr6, 2, killed $vgpr1 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr7, 3, killed $vgpr1 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr8, 4, killed $vgpr1 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr9, 5, killed $vgpr1 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr10, 6, killed $vgpr1 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 killed $sgpr11, 7, killed $vgpr1, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; CHECK-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr1, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr44_sgpr45, 0, 0 :: (invariant load (s64), align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: $vgpr1 = COPY renamable $sgpr51 @@ -63,50 +52,30 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; CHECK-NEXT: $sgpr5 = V_READLANE_B32 $vgpr1, 1 - ; CHECK-NEXT: $sgpr6 = V_READLANE_B32 $vgpr1, 2 - ; CHECK-NEXT: $sgpr7 = V_READLANE_B32 $vgpr1, 3 - ; CHECK-NEXT: $sgpr8 = V_READLANE_B32 $vgpr1, 4 - ; CHECK-NEXT: $sgpr9 = V_READLANE_B32 $vgpr1, 5 - ; CHECK-NEXT: $sgpr10 = V_READLANE_B32 $vgpr1, 6 - ; CHECK-NEXT: $sgpr11 = V_READLANE_B32 $vgpr1, 7 - ; CHECK-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $exec = S_MOV_B64 killed $noreg + ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; CHECK-NEXT: $sgpr5 = V_READLANE_B32 $vgpr1, 1 - ; CHECK-NEXT: $sgpr6 = V_READLANE_B32 $vgpr1, 2 - ; CHECK-NEXT: $sgpr7 = V_READLANE_B32 $vgpr1, 3 - ; CHECK-NEXT: $sgpr8 = V_READLANE_B32 $vgpr1, 4 - ; CHECK-NEXT: $sgpr9 = V_READLANE_B32 $vgpr1, 5 - ; CHECK-NEXT: $sgpr10 = V_READLANE_B32 $vgpr1, 6 - ; CHECK-NEXT: $sgpr11 = V_READLANE_B32 $vgpr1, 7 + ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) ; CHECK-NEXT: S_CMP_LG_U64 renamable $sgpr4_sgpr5, 0, implicit-def $scc - ; CHECK-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $exec = S_MOV_B64 killed $noreg ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) - ; CHECK-NEXT: liveins: $vgpr1, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 + ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.5, implicit undef $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) - ; CHECK-NEXT: liveins: $vgpr1, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 + ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CMP_EQ_U32 renamable $sgpr8, 0, implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: liveins: $vgpr1, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 + ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr38_sgpr39, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4) ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef renamable $vgpr0, undef renamable $vgpr0, killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s32), addrspace 1) @@ -116,7 +85,6 @@ body: | ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr36_sgpr37 ; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr34_sgpr35 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: KILL killed renamable $vgpr1 ; CHECK-NEXT: S_ENDPGM 0 bb.0: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 From 12dfcc02388810e113cca359b2b529c98d02307c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 26 Oct 2023 10:09:31 +0100 Subject: [PATCH 035/877] [DAG] Update test case for Issue #69965 The previous reduced test case just showed a minor codegen regression, this test now shows the actual miscompilation --- llvm/test/CodeGen/X86/pr69965.ll | 74 ++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/llvm/test/CodeGen/X86/pr69965.ll b/llvm/test/CodeGen/X86/pr69965.ll index fc805e5097c0b..47bec4d7b1ab5 100644 --- a/llvm/test/CodeGen/X86/pr69965.ll +++ b/llvm/test/CodeGen/X86/pr69965.ll @@ -2,40 +2,50 @@ ; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=X64 -define i16 @test(i8 %_in) { -; X86-LABEL: test: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notb %al -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orb $-128, %cl -; X86-NEXT: movzbl %cl, %ecx -; X86-NEXT: shll $8, %ecx -; X86-NEXT: addb %al, %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: # kill: def $ax killed $ax killed $eax +define i64 @PR69965(ptr %input_ptrs, ptr %output_ptrs) { +; X86-LABEL: PR69965: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl (%eax), %eax +; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: notl %eax +; X86-NEXT: movzbl %al, %edx +; X86-NEXT: shll $8, %eax +; X86-NEXT: movl (%ecx), %ecx +; X86-NEXT: leal (%eax,%edx,2), %eax +; X86-NEXT: orl $32768, %eax # imm = 0x8000 +; X86-NEXT: movw %ax, (%ecx) +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test: -; X64: # %bb.0: -; X64-NEXT: notb %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: orb $-128, %al +; X64-LABEL: PR69965: +; X64: # %bb.0: # %entry +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movzbl (%rax), %eax +; X64-NEXT: notl %eax ; X64-NEXT: movzbl %al, %ecx -; X64-NEXT: shll $8, %ecx -; X64-NEXT: addb %dil, %dil -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: orl %ecx, %eax -; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: # kill: def $eax killed $eax def $rax +; X64-NEXT: shll $8, %eax +; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: leal (%rax,%rcx,2), %eax +; X64-NEXT: orl $32768, %eax # imm = 0x8000 +; X64-NEXT: movw %ax, (%rdx) +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq - %_1 = and i8 %_in, 127 - %_2 = xor i8 %_1, 127 - %_3 = or i8 %_2, -128 - %_4 = zext i8 %_3 to i16 - %_6 = shl nuw i16 %_4, 8 - %_7 = shl nuw i8 %_2, 1 - %_8 = zext i8 %_7 to i16 - %_9 = or i16 %_6, %_8 - ret i16 %_9 +entry: + %0 = load ptr, ptr %input_ptrs, align 8 + %.val.i = load i8, ptr %0, align 1 + %1 = and i8 %.val.i, 127 + %2 = xor i8 %1, 127 + %3 = or i8 %2, -128 + %4 = zext i8 %3 to i16 + %5 = load ptr, ptr %output_ptrs, align 8 + %6 = shl nuw i16 %4, 8 + %7 = shl nuw i8 %2, 1 + %8 = zext i8 %7 to i16 + %9 = or i16 %6, %8 + store i16 %9, ptr %5, align 2 + ret i64 0 } From 547dc461225ba65b6d5dac34efd31d2c8c5b1049 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 26 Oct 2023 10:34:06 +0100 Subject: [PATCH 036/877] [DAG] SimplifyDemandedBits - ensure we drop NSW/NUW flags when we simplify a SHL node's input We already do this for variable shifts, but we missed it for constant shifts Fixes #69965 --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 11 ++++++++++- llvm/test/CodeGen/X86/pr69965.ll | 19 +++++++++++-------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8b4f315949912..c139850eb9963 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1786,8 +1786,17 @@ bool TargetLowering::SimplifyDemandedBits( APInt InDemandedMask = DemandedBits.lshr(ShAmt); if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, - Depth + 1)) + Depth + 1)) { + SDNodeFlags Flags = Op.getNode()->getFlags(); + if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { + // Disable the nsw and nuw flags. We can no longer guarantee that we + // won't wrap after simplification. + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + Op->setFlags(Flags); + } return true; + } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero <<= ShAmt; Known.One <<= ShAmt; diff --git a/llvm/test/CodeGen/X86/pr69965.ll b/llvm/test/CodeGen/X86/pr69965.ll index 47bec4d7b1ab5..33bea976c7896 100644 --- a/llvm/test/CodeGen/X86/pr69965.ll +++ b/llvm/test/CodeGen/X86/pr69965.ll @@ -10,10 +10,12 @@ define i64 @PR69965(ptr %input_ptrs, ptr %output_ptrs) { ; X86-NEXT: movl (%eax), %eax ; X86-NEXT: movzbl (%eax), %eax ; X86-NEXT: notl %eax -; X86-NEXT: movzbl %al, %edx -; X86-NEXT: shll $8, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shll $8, %edx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: leal (%eax,%edx,2), %eax +; X86-NEXT: addb %al, %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: orl %edx, %eax ; X86-NEXT: orl $32768, %eax # imm = 0x8000 ; X86-NEXT: movw %ax, (%ecx) ; X86-NEXT: xorl %eax, %eax @@ -25,13 +27,14 @@ define i64 @PR69965(ptr %input_ptrs, ptr %output_ptrs) { ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: movzbl (%rax), %eax ; X64-NEXT: notl %eax -; X64-NEXT: movzbl %al, %ecx -; X64-NEXT: # kill: def $eax killed $eax def $rax +; X64-NEXT: leal (%rax,%rax), %ecx +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: shll $8, %eax ; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: leal (%rax,%rcx,2), %eax -; X64-NEXT: orl $32768, %eax # imm = 0x8000 -; X64-NEXT: movw %ax, (%rdx) +; X64-NEXT: movzbl %cl, %ecx +; X64-NEXT: orl %eax, %ecx +; X64-NEXT: orl $32768, %ecx # imm = 0x8000 +; X64-NEXT: movw %cx, (%rdx) ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq entry: From 1217a54be9a194137c776c692a2fad2ba80a8ee8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 26 Oct 2023 11:23:14 +0200 Subject: [PATCH 037/877] [clang][Interp][NFC] Make InlineDescriptor::Desc const --- clang/lib/AST/Interp/Descriptor.cpp | 2 +- clang/lib/AST/Interp/Descriptor.h | 6 +++--- clang/lib/AST/Interp/Pointer.cpp | 2 +- clang/lib/AST/Interp/Pointer.h | 7 ++++--- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/clang/lib/AST/Interp/Descriptor.cpp b/clang/lib/AST/Interp/Descriptor.cpp index abd75a8d67bbd..2a21f60588d46 100644 --- a/clang/lib/AST/Interp/Descriptor.cpp +++ b/clang/lib/AST/Interp/Descriptor.cpp @@ -262,7 +262,7 @@ Descriptor::Descriptor(const DeclTy &D, PrimType Type, bool IsTemporary, } /// Arrays of composite elements. -Descriptor::Descriptor(const DeclTy &D, Descriptor *Elem, MetadataSize MD, +Descriptor::Descriptor(const DeclTy &D, const Descriptor *Elem, MetadataSize MD, unsigned NumElems, bool IsConst, bool IsTemporary, bool IsMutable) : Source(D), ElemSize(Elem->getAllocSize() + sizeof(InlineDescriptor)), diff --git a/clang/lib/AST/Interp/Descriptor.h b/clang/lib/AST/Interp/Descriptor.h index be9a380138a7b..2fd4e92082645 100644 --- a/clang/lib/AST/Interp/Descriptor.h +++ b/clang/lib/AST/Interp/Descriptor.h @@ -72,7 +72,7 @@ struct InlineDescriptor { /// Flag indicating if the field is mutable (if in a record). unsigned IsFieldMutable : 1; - Descriptor *Desc; + const Descriptor *Desc; }; /// Describes a memory block created by an allocation site. @@ -102,7 +102,7 @@ struct Descriptor final { /// Pointer to the record, if block contains records. Record *const ElemRecord = nullptr; /// Descriptor of the array element. - Descriptor *const ElemDesc = nullptr; + const Descriptor *const ElemDesc = nullptr; /// Flag indicating if the block is mutable. const bool IsConst = false; /// Flag indicating if a field is mutable. @@ -129,7 +129,7 @@ struct Descriptor final { Descriptor(const DeclTy &D, PrimType Type, bool IsTemporary, UnknownSize); /// Allocates a descriptor for an array of composites. - Descriptor(const DeclTy &D, Descriptor *Elem, MetadataSize MD, + Descriptor(const DeclTy &D, const Descriptor *Elem, MetadataSize MD, unsigned NumElems, bool IsConst, bool IsTemporary, bool IsMutable); /// Allocates a descriptor for an array of composites of unknown size. diff --git a/clang/lib/AST/Interp/Pointer.cpp b/clang/lib/AST/Interp/Pointer.cpp index d1af58203bec6..e979b99b0fdd0 100644 --- a/clang/lib/AST/Interp/Pointer.cpp +++ b/clang/lib/AST/Interp/Pointer.cpp @@ -94,7 +94,7 @@ APValue Pointer::toAPValue() const { Offset = CharUnits::Zero(); } else { // Build the lvalue base from the block. - Descriptor *Desc = getDeclDesc(); + const Descriptor *Desc = getDeclDesc(); if (auto *VD = Desc->asValueDecl()) Base = VD; else if (auto *E = Desc->asExpr()) diff --git a/clang/lib/AST/Interp/Pointer.h b/clang/lib/AST/Interp/Pointer.h index 3b21290332a9d..65d710077fd1c 100644 --- a/clang/lib/AST/Interp/Pointer.h +++ b/clang/lib/AST/Interp/Pointer.h @@ -184,7 +184,8 @@ class Pointer { // Step into the containing array, if inside one. unsigned Next = Base - getInlineDesc()->Offset; - Descriptor *Desc = Next == 0 ? getDeclDesc() : getDescriptor(Next)->Desc; + const Descriptor *Desc = + Next == 0 ? getDeclDesc() : getDescriptor(Next)->Desc; if (!Desc->IsArray) return *this; return Pointer(Pointee, Next, Offset); @@ -198,7 +199,7 @@ class Pointer { bool isField() const { return Base != 0 && Base != RootPtrMark; } /// Accessor for information about the declaration site. - Descriptor *getDeclDesc() const { return Pointee->Desc; } + const Descriptor *getDeclDesc() const { return Pointee->Desc; } SourceLocation getDeclLoc() const { return getDeclDesc()->getLocation(); } /// Returns a pointer to the object of which this pointer is a field. @@ -222,7 +223,7 @@ class Pointer { } /// Accessors for information about the innermost field. - Descriptor *getFieldDesc() const { + const Descriptor *getFieldDesc() const { if (Base == 0 || Base == RootPtrMark) return getDeclDesc(); return getInlineDesc()->Desc; From f8b7506e2db2a162deec8343d6942258484be233 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 26 Oct 2023 11:40:43 +0200 Subject: [PATCH 038/877] [clang][NFC] Move a variable into the closest scope AllocType is not used anywhere else. --- clang/lib/AST/ExprConstant.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index eea0827d6f7a8..320e2ef12c38d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -6867,8 +6867,8 @@ static std::optional CheckDeleteKind(EvalInfo &Info, const Expr *E, return std::nullopt; } - QualType AllocType = Pointer.Base.getDynamicAllocType(); if (DeallocKind != (*Alloc)->getKind()) { + QualType AllocType = Pointer.Base.getDynamicAllocType(); Info.FFDiag(E, diag::note_constexpr_new_delete_mismatch) << DeallocKind << (*Alloc)->getKind() << AllocType; NoteLValueLocation(Info, Pointer.Base); From 737d2acd6863ca7012a75575c1a52fd807ed44ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 26 Oct 2023 11:47:26 +0200 Subject: [PATCH 039/877] [clang][Interp][NFC] Make Block::Desc const --- clang/lib/AST/Interp/InterpBlock.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/AST/Interp/InterpBlock.h b/clang/lib/AST/Interp/InterpBlock.h index 89f937b588fb6..4ab67ebb9eaaf 100644 --- a/clang/lib/AST/Interp/InterpBlock.h +++ b/clang/lib/AST/Interp/InterpBlock.h @@ -53,7 +53,7 @@ class Block final { bool IsStatic = false, bool IsExtern = false) : DeclID(DeclID), IsStatic(IsStatic), IsExtern(IsExtern), Desc(Desc) {} - Block(Descriptor *Desc, bool IsStatic = false, bool IsExtern = false) + Block(const Descriptor *Desc, bool IsStatic = false, bool IsExtern = false) : DeclID((unsigned)-1), IsStatic(IsStatic), IsExtern(IsExtern), Desc(Desc) {} @@ -120,8 +120,8 @@ class Block final { friend class DeadBlock; friend class InterpState; - Block(Descriptor *Desc, bool IsExtern, bool IsStatic, bool IsDead) - : IsStatic(IsStatic), IsExtern(IsExtern), IsDead(true), Desc(Desc) {} + Block(const Descriptor *Desc, bool IsExtern, bool IsStatic, bool IsDead) + : IsStatic(IsStatic), IsExtern(IsExtern), IsDead(true), Desc(Desc) {} /// Deletes a dead block at the end of its lifetime. void cleanup(); @@ -149,7 +149,7 @@ class Block final { /// via invokeCtor. bool IsInitialized = false; /// Pointer to the stack slot descriptor. - Descriptor *Desc; + const Descriptor *Desc; }; /// Descriptor for a dead block. From 24865a6423bbdb0286a649c5a953af0e817bfee8 Mon Sep 17 00:00:00 2001 From: Piotr Sobczak Date: Thu, 26 Oct 2023 11:46:14 +0200 Subject: [PATCH 040/877] [Inline Spiller] Pre-commit test --- llvm/test/CodeGen/AMDGPU/dead_bundle.mir | 42 ++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/dead_bundle.mir diff --git a/llvm/test/CodeGen/AMDGPU/dead_bundle.mir b/llvm/test/CodeGen/AMDGPU/dead_bundle.mir new file mode 100644 index 0000000000000..d302cb8051c1a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dead_bundle.mir @@ -0,0 +1,42 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn--amdpal -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs=0 -start-before=greedy,0 -stop-after=virtregrewriter,0 -stress-regalloc=5 %s -o - | FileCheck %s + +# This test currently fails with verify-machineinstrs=1 due to dead bundle mishandling: "Live range continues after dead def flag". +--- +name: psmain +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: '$sgpr32' + psInputAddr: 7 + psInputEnable: 7 +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: psmain + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead renamable $sgpr3 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $sgpr1 = KILL undef $sgpr1 + ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_BUFFER_LOAD_DWORDX8_IMM undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, 416, 0 :: (dereferenceable invariant load (s256), align 4) + ; CHECK-NEXT: dead [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e64 0, $sgpr4, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: SI_SPILL_S256_SAVE renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: dead renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_BUFFER_LOAD_DWORDX8_IMM undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, 416, 0 :: (dereferenceable invariant load (s256), align 4) + ; CHECK-NEXT: renamable $sgpr3 = COPY killed renamable $sgpr7 + ; CHECK-NEXT: renamable $sgpr5 = COPY renamable $sgpr9 + ; CHECK-NEXT: dead undef %4.sub0:vreg_64 = COPY renamable $sgpr3 + ; CHECK-NEXT: dead undef %7.sub1:vreg_64 = COPY killed renamable $sgpr5 + ; CHECK-NEXT: dead [[IMAGE_SAMPLE_V1_V2_gfx11_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_V1_V2_gfx11 undef %4, undef renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) + ; CHECK-NEXT: S_ENDPGM 0 + undef %8.sub3:sgpr_128 = IMPLICIT_DEF + undef %8.sub1:sgpr_128 = COPY undef $sgpr1 + %346:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM undef %8, 416, 0 :: (dereferenceable invariant load (s256), align 4) + %60:vgpr_32 = V_CVT_U32_F32_e64 0, %346.sub0, 0, 0, implicit $mode, implicit $exec + %127:sgpr_512 = IMPLICIT_DEF + undef %283.sub0:vreg_64 = COPY %346.sub3 + undef %283.sub1:vreg_64 = COPY %346.sub5 + %282:vgpr_32 = IMAGE_SAMPLE_V1_V2_gfx11 undef %283, undef %127.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, %8, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) + S_ENDPGM 0 +... + From 80abbeca8e52174157654ee33151da507d098ced Mon Sep 17 00:00:00 2001 From: Piotr Sobczak Date: Thu, 26 Oct 2023 11:51:26 +0200 Subject: [PATCH 041/877] [Inline Spiller] Consider bundles when marking defs as dead Fix bug where the code expects just a single MI, but a series of bundled MIs need to be handled instead. The semi-formed bundled are created by SplitKit for the case where not all lanes are live (buildSingleSubRegCopy). Then the remat kicks in, and since the values that are copied in the bundle do not need to be preserved due to the remat (dead defs), all instructions in the bundle should be marked as dead. However, only the first one gets marked as dead, which causes the verifier to complain later with error: "Live range continues after dead def flag". Differential Revision: https://reviews.llvm.org/D156999 --- llvm/lib/CodeGen/InlineSpiller.cpp | 29 ++++++++++++++++++++++++ llvm/test/CodeGen/AMDGPU/dead_bundle.mir | 5 ++-- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 46fcc62e09e8a..71d58b2e9e18d 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -753,6 +753,35 @@ void InlineSpiller::reMaterializeAll() { continue; LLVM_DEBUG(dbgs() << "All defs dead: " << *MI); DeadDefs.push_back(MI); + // If MI is a bundle header, also try removing copies inside the bundle, + // otherwise the verifier would complain "live range continues after dead + // def flag". + if (MI->isBundledWithSucc() && !MI->isBundledWithPred()) { + MachineBasicBlock::instr_iterator BeginIt = MI->getIterator(), + EndIt = MI->getParent()->instr_end(); + ++BeginIt; // Skip MI that was already handled. + + bool OnlyDeadCopies = true; + for (MachineBasicBlock::instr_iterator It = BeginIt; + It != EndIt && It->isBundledWithPred(); ++It) { + + auto DestSrc = TII.isCopyInstr(*It); + bool IsCopyToDeadReg = + DestSrc && DestSrc->Destination->getReg() == Reg; + if (!IsCopyToDeadReg) { + OnlyDeadCopies = false; + break; + } + } + if (OnlyDeadCopies) { + for (MachineBasicBlock::instr_iterator It = BeginIt; + It != EndIt && It->isBundledWithPred(); ++It) { + It->addRegisterDead(Reg, &TRI); + LLVM_DEBUG(dbgs() << "All defs dead: " << *It); + DeadDefs.push_back(&*It); + } + } + } } } diff --git a/llvm/test/CodeGen/AMDGPU/dead_bundle.mir b/llvm/test/CodeGen/AMDGPU/dead_bundle.mir index d302cb8051c1a..7970650f43d1b 100644 --- a/llvm/test/CodeGen/AMDGPU/dead_bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/dead_bundle.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn--amdpal -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs=0 -start-before=greedy,0 -stop-after=virtregrewriter,0 -stress-regalloc=5 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn--amdpal -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs=1 -start-before=greedy,0 -stop-after=virtregrewriter,0 -stress-regalloc=5 %s -o - | FileCheck %s -# This test currently fails with verify-machineinstrs=1 due to dead bundle mishandling: "Live range continues after dead def flag". +# This test checks that dead bundles are handled correctly. --- name: psmain tracksRegLiveness: true @@ -20,7 +20,6 @@ body: | ; CHECK-NEXT: renamable $sgpr1 = KILL undef $sgpr1 ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_BUFFER_LOAD_DWORDX8_IMM undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, 416, 0 :: (dereferenceable invariant load (s256), align 4) ; CHECK-NEXT: dead [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e64 0, $sgpr4, 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: SI_SPILL_S256_SAVE renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: dead renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_BUFFER_LOAD_DWORDX8_IMM undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, 416, 0 :: (dereferenceable invariant load (s256), align 4) ; CHECK-NEXT: renamable $sgpr3 = COPY killed renamable $sgpr7 From 85f6b2fac9a367337e43ca288c45ea783981cc16 Mon Sep 17 00:00:00 2001 From: Sunil Kuravinakop Date: Thu, 26 Oct 2023 05:01:02 -0500 Subject: [PATCH 042/877] [OpenMP] Patch for Support to loop bind clause : Checking Parent Region Differential revision: https://reviews.llvm.org/D158266 --- clang/include/clang/Sema/Sema.h | 8 +- clang/lib/Sema/SemaOpenMP.cpp | 64 +++++--- clang/test/OpenMP/loop_bind_messages.cpp | 180 +++++++++++++++++++++-- clang/test/PCH/pragma-loop.cpp | 8 +- 4 files changed, 227 insertions(+), 33 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 1e9752345ffd1..18ac85011aa75 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11307,6 +11307,7 @@ class Sema final { /// on the parameter of the bind clause. In the methods for the /// mapped directives, check the parameters of the lastprivate clause. bool checkLastPrivateForMappedDirectives(ArrayRef Clauses); + /// Depending on the bind clause of OMPD_loop map the directive to new /// directives. /// 1) loop bind(parallel) --> OMPD_for @@ -11316,9 +11317,12 @@ class Sema final { /// rigorous semantic checking in the new mapped directives. bool mapLoopConstruct(llvm::SmallVector &ClausesWithoutBind, ArrayRef Clauses, - OpenMPBindClauseKind BindKind, + OpenMPBindClauseKind &BindKind, OpenMPDirectiveKind &Kind, - OpenMPDirectiveKind &PrevMappedDirective); + OpenMPDirectiveKind &PrevMappedDirective, + SourceLocation StartLoc, SourceLocation EndLoc, + const DeclarationNameInfo &DirName, + OpenMPDirectiveKind CancelRegion); public: /// The declarator \p D defines a function in the scope \p S which is nested diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 75f9e152dca92..f28e0f2693080 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5062,6 +5062,18 @@ static bool checkNestingOfRegions(Sema &SemaRef, const DSAStackTy *Stack, CurrentRegion != OMPD_cancellation_point && CurrentRegion != OMPD_cancel && CurrentRegion != OMPD_scan) return false; + // Checks needed for mapping "loop" construct. Please check mapLoopConstruct + // for a detailed explanation + if (SemaRef.LangOpts.OpenMP >= 50 && CurrentRegion == OMPD_loop && + ((BindKind == OMPC_BIND_parallel) || (BindKind == OMPC_BIND_teams)) && + (isOpenMPWorksharingDirective(ParentRegion) || + ParentRegion == OMPD_loop)) { + int ErrorMsgNumber = (BindKind == OMPC_BIND_parallel) ? 1 : 4; + SemaRef.Diag(StartLoc, diag::err_omp_prohibited_region) + << true << getOpenMPDirectiveName(ParentRegion) << ErrorMsgNumber + << getOpenMPDirectiveName(CurrentRegion); + return true; + } if (CurrentRegion == OMPD_cancellation_point || CurrentRegion == OMPD_cancel) { // OpenMP [2.16, Nesting of Regions] @@ -6114,35 +6126,40 @@ processImplicitMapsWithDefaultMappers(Sema &S, DSAStackTy *Stack, bool Sema::mapLoopConstruct(llvm::SmallVector &ClausesWithoutBind, ArrayRef Clauses, - OpenMPBindClauseKind BindKind, + OpenMPBindClauseKind &BindKind, OpenMPDirectiveKind &Kind, - OpenMPDirectiveKind &PrevMappedDirective) { + OpenMPDirectiveKind &PrevMappedDirective, + SourceLocation StartLoc, SourceLocation EndLoc, + const DeclarationNameInfo &DirName, + OpenMPDirectiveKind CancelRegion) { bool UseClausesWithoutBind = false; // Restricting to "#pragma omp loop bind" if (getLangOpts().OpenMP >= 50 && Kind == OMPD_loop) { + + const OpenMPDirectiveKind ParentDirective = DSAStack->getParentDirective(); + if (BindKind == OMPC_BIND_unknown) { // Setting the enclosing teams or parallel construct for the loop // directive without bind clause. BindKind = OMPC_BIND_thread; // Default bind(thread) if binding is unknown - const OpenMPDirectiveKind ParentDirective = - DSAStack->getParentDirective(); if (ParentDirective == OMPD_unknown) { Diag(DSAStack->getDefaultDSALocation(), diag::err_omp_bind_required_on_loop); - } else if (ParentDirective == OMPD_parallel || - ParentDirective == OMPD_target_parallel) { + } else if (isOpenMPParallelDirective(ParentDirective) && + !isOpenMPTeamsDirective(ParentDirective)) { BindKind = OMPC_BIND_parallel; - } else if (ParentDirective == OMPD_teams || - ParentDirective == OMPD_target_teams) { + } else if (isOpenMPNestingTeamsDirective(ParentDirective) || + (ParentDirective == OMPD_target_teams)) { BindKind = OMPC_BIND_teams; } } else { - // bind clause is present, so we should set flag indicating to only - // use the clauses that aren't the bind clause for the new directive that - // loop is lowered to. + // bind clause is present in loop directive. When the loop directive is + // changed to a new directive the bind clause is not used. So, we should + // set flag indicating to only use the clauses that aren't the + // bind clause. UseClausesWithoutBind = true; } @@ -6203,26 +6220,35 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( OpenMPDirectiveKind PrevMappedDirective) { StmtResult Res = StmtError(); OpenMPBindClauseKind BindKind = OMPC_BIND_unknown; + llvm::SmallVector ClausesWithoutBind; + bool UseClausesWithoutBind = false; + if (const OMPBindClause *BC = OMPExecutableDirective::getSingleClause(Clauses)) BindKind = BC->getBindKind(); + + // Variable used to note down the DirectiveKind because mapLoopConstruct may + // change "Kind" variable, due to mapping of "omp loop" to other directives. + OpenMPDirectiveKind DK = Kind; + if ((Kind == OMPD_loop) || (PrevMappedDirective == OMPD_loop)) { + UseClausesWithoutBind = mapLoopConstruct( + ClausesWithoutBind, Clauses, BindKind, Kind, PrevMappedDirective, + StartLoc, EndLoc, DirName, CancelRegion); + DK = OMPD_loop; + } + // First check CancelRegion which is then used in checkNestingOfRegions. if (checkCancelRegion(*this, Kind, CancelRegion, StartLoc) || - checkNestingOfRegions(*this, DSAStack, Kind, DirName, CancelRegion, - BindKind, StartLoc)) + checkNestingOfRegions(*this, DSAStack, DK, DirName, CancelRegion, + BindKind, StartLoc)) { return StmtError(); + } // Report affected OpenMP target offloading behavior when in HIP lang-mode. if (getLangOpts().HIP && (isOpenMPTargetExecutionDirective(Kind) || isOpenMPTargetDataManagementDirective(Kind))) Diag(StartLoc, diag::warn_hip_omp_target_directives); - llvm::SmallVector ClausesWithoutBind; - bool UseClausesWithoutBind = false; - - UseClausesWithoutBind = mapLoopConstruct(ClausesWithoutBind, Clauses, - BindKind, Kind, PrevMappedDirective); - llvm::SmallVector ClausesWithImplicit; VarsWithInheritedDSAType VarsWithInheritedDSA; bool ErrorFound = false; diff --git a/clang/test/OpenMP/loop_bind_messages.cpp b/clang/test/OpenMP/loop_bind_messages.cpp index f7fdf28971432..186a990fd7c52 100644 --- a/clang/test/OpenMP/loop_bind_messages.cpp +++ b/clang/test/OpenMP/loop_bind_messages.cpp @@ -4,6 +4,7 @@ #define NNN 50 int aaa[NNN]; +int aaa2[NNN][NNN]; void parallel_loop() { #pragma omp parallel @@ -15,6 +16,91 @@ void parallel_loop() { } } +void parallel_for_AND_loop_bind() { + #pragma omp parallel for + for (int i = 0 ; i < NNN ; i++) { + #pragma omp loop bind(parallel) // expected-error{{region cannot be closely nested inside 'parallel for' region; perhaps you forget to enclose 'omp loop' directive into a parallel region?}} + for (int j = 0 ; j < NNN ; j++) { + aaa2[i][j] = i+j; + } + } +} + +void parallel_nowait() { + #pragma omp parallel + #pragma omp for nowait + for (int i = 0 ; i < NNN ; i++) { + #pragma omp loop bind(parallel) // expected-error{{region cannot be closely nested inside 'for' region; perhaps you forget to enclose 'omp loop' directive into a parallel region?}} + for (int j = 0 ; j < NNN ; j++) { + aaa2[i][j] = i+j; + } + } +} + +void parallel_for_with_nothing() { + #pragma omp parallel for + for (int i = 0 ; i < NNN ; i++) { + #pragma omp nothing + #pragma omp loop // expected-error{{region cannot be closely nested inside 'parallel for' region; perhaps you forget to enclose 'omp loop' directive into a parallel region?}} + for (int j = 0 ; j < NNN ; j++) { + aaa2[i][j] = i+j; + } + } +} + +void parallel_targetfor_with_loop_bind() { + #pragma omp target teams distribute parallel for + for (int i = 0 ; i < NNN ; i++) { + #pragma omp loop bind(parallel) // expected-error{{region cannot be closely nested inside 'target teams distribute parallel for' region; perhaps you forget to enclose 'omp loop' directive into a parallel region?}} + for (int j = 0 ; j < NNN ; j++) { + aaa2[i][j] = i+j; + } + } +} + +void parallel_targetparallel_with_loop() { + #pragma omp target parallel + for (int i = 0 ; i < NNN ; i++) { + #pragma omp loop bind(parallel) + for (int j = 0 ; j < NNN ; j++) { + aaa2[i][j] = i+j; + } + } +} + +void loop_bind_AND_loop_bind() { + #pragma omp parallel for + for (int i = 0; i < 100; ++i) { + #pragma omp loop bind(parallel) // expected-error{{region cannot be closely nested inside 'parallel for' region; perhaps you forget to enclose 'omp loop' directive into a parallel region?}} + for (int i = 0 ; i < NNN ; i++) { + #pragma omp loop bind(parallel) // expected-error{{region cannot be closely nested inside 'loop' region; perhaps you forget to enclose 'omp loop' directive into a parallel region?}} + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } + } + } +} + +void parallel_with_sections_loop() { + #pragma omp parallel + { + #pragma omp sections + { + for (int i = 0 ; i < NNN ; i++) { + #pragma omp loop bind(parallel) // expected-error{{region cannot be closely nested inside 'sections' region; perhaps you forget to enclose 'omp loop' directive into a parallel region?}} + for (int j = 0 ; j < NNN ; j++) { + aaa2[i][j] = i+j; + } + } + + #pragma omp section + { + aaa[NNN-1] = NNN; + } + } + } +} + void teams_loop() { int var1, var2; @@ -34,17 +120,23 @@ void teams_loop() { } } -void orphan_loop_with_bind() { - #pragma omp loop bind(parallel) - for (int j = 0 ; j < NNN ; j++) { - aaa[j] = j*NNN; +void teams_targetteams_with_loop() { + #pragma omp target teams + for (int i = 0 ; i < NNN ; i++) { + #pragma omp loop bind(teams) + for (int j = 0 ; j < NNN ; j++) { + aaa2[i][j] = i+j; + } } } -void orphan_loop_no_bind() { - #pragma omp loop // expected-error{{expected 'bind' clause for 'loop' construct without an enclosing OpenMP construct}} - for (int j = 0 ; j < NNN ; j++) { - aaa[j] = j*NNN; +void teams_targetfor_with_loop_bind() { + #pragma omp target teams distribute parallel for + for (int i = 0 ; i < NNN ; i++) { + #pragma omp loop bind(teams) // expected-error{{region cannot be closely nested inside 'target teams distribute parallel for' region; perhaps you forget to enclose 'omp loop' directive into a teams region?}} + for (int j = 0 ; j < NNN ; j++) { + aaa2[i][j] = i+j; + } } } @@ -65,12 +157,80 @@ void teams_loop_reduction() { } } +void teams_loop_distribute() { + int total = 0; + + #pragma omp teams num_teams(8) thread_limit(256) + #pragma omp distribute parallel for dist_schedule(static, 1024) \ + schedule(static, 64) + for (int i = 0; i < NNN; i++) { + #pragma omp loop bind(teams) // expected-error{{'distribute parallel for' region; perhaps you forget to enclose 'omp loop' directive into a teams region?}} + for (int j = 0; j < NNN; j++) { + aaa2[i][j] = i+j; + } + } +} + +void parallel_for_with_loop_teams_bind(){ + #pragma omp parallel for + for (int i = 0; i < NNN; i++) { + #pragma omp loop bind(teams) // expected-error{{region cannot be closely nested inside 'parallel for' region; perhaps you forget to enclose 'omp loop' directive into a teams region?}} + for (int j = 0 ; j < NNN ; j++) { + aaa[i] = i+i*NNN; + } + } +} + +void teams_with_loop_thread_bind(){ + #pragma omp teams + for (int i = 0; i < NNN; i++) { + #pragma omp loop bind(thread) + for (int j = 0 ; j < NNN ; j++) { + aaa[i] = i+i*NNN; + } + } +} + +void orphan_loop_no_bind() { + #pragma omp loop // expected-error{{expected 'bind' clause for 'loop' construct without an enclosing OpenMP construct}} + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } +} + +void orphan_loop_parallel_bind() { + #pragma omp loop bind(parallel) + for (int j = 0 ; j < NNN ; j++) { + aaa[j] = j*NNN; + } +} + +void orphan_loop_teams_bind(){ + #pragma omp loop bind(teams) + for (int i = 0; i < NNN; i++) { + aaa[i] = i+i*NNN; + } +} + int main(int argc, char *argv[]) { parallel_loop(); + parallel_for_AND_loop_bind(); + parallel_nowait(); + parallel_for_with_nothing(); + parallel_targetfor_with_loop_bind(); + parallel_targetparallel_with_loop(); + loop_bind_AND_loop_bind(); + parallel_with_sections_loop(); teams_loop(); - orphan_loop_with_bind(); - orphan_loop_no_bind(); + teams_targetteams_with_loop(); + teams_targetfor_with_loop_bind(); teams_loop_reduction(); + teams_loop_distribute(); + parallel_for_with_loop_teams_bind(); + teams_with_loop_thread_bind(); + orphan_loop_no_bind(); + orphan_loop_parallel_bind(); + orphan_loop_teams_bind(); } #endif diff --git a/clang/test/PCH/pragma-loop.cpp b/clang/test/PCH/pragma-loop.cpp index f5de630ffc912..a3c6871041c0e 100644 --- a/clang/test/PCH/pragma-loop.cpp +++ b/clang/test/PCH/pragma-loop.cpp @@ -116,9 +116,13 @@ class pragma_test { inline void run10(int *List, int Length) { int i = 0; -#pragma omp loop bind(teams) + int j = 0; + #pragma omp teams for (int i = 0; i < Length; i++) { - List[i] = i; + #pragma omp loop bind(teams) + for (int j = 0; j < Length; j++) { + List[i] = i+j; + } } } From 925f4622dcd09e0d70c6d30779e0c119fb12ce00 Mon Sep 17 00:00:00 2001 From: zhongyunde 00443407 Date: Sat, 21 Oct 2023 13:07:26 +0800 Subject: [PATCH 043/877] [SimplifyCFG] Precommit tests for PR65835 --- .../Transforms/SimplifyCFG/switch_mask.ll | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 llvm/test/Transforms/SimplifyCFG/switch_mask.ll diff --git a/llvm/test/Transforms/SimplifyCFG/switch_mask.ll b/llvm/test/Transforms/SimplifyCFG/switch_mask.ll new file mode 100644 index 0000000000000..8c97a0660d070 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/switch_mask.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=simplifycfg --switch-to-lookup -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; https://alive2.llvm.org/ce/z/tuxLhJ +define i1 @switch_lookup_with_small_i1(i64 %x) { +; CHECK-LABEL: @switch_lookup_with_small_i1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND:%.*]] = and i64 [[X:%.*]], 15 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[AND]], 11 +; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[AND]] to i11 +; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i11 [[SWITCH_CAST]], 1 +; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i11 -1018, [[SWITCH_SHIFTAMT]] +; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i11 [[SWITCH_DOWNSHIFT]] to i1 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_MASKED]], i1 false +; CHECK-NEXT: ret i1 [[TMP1]] +; +entry: + %and = and i64 %x, 15 + switch i64 %and, label %default [ + i64 10, label %lor.end + i64 1, label %lor.end + i64 2, label %lor.end + ] + +default: ; preds = %entry + br label %lor.end + +lor.end: ; preds = %entry, %entry, %entry, %default + %0 = phi i1 [ true, %entry ], [ false, %default ], [ true, %entry ], [ true, %entry ] + ret i1 %0 +} + +; https://godbolt.org/z/sjbjorKon +define i8 @switch_lookup_with_small_i8(i64 %x) { +; CHECK-LABEL: @switch_lookup_with_small_i8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[REM:%.*]] = urem i64 [[X:%.*]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[REM]], 3 +; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[REM]] to i24 +; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i24 [[SWITCH_CAST]], 8 +; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i24 460303, [[SWITCH_SHIFTAMT]] +; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i24 [[SWITCH_DOWNSHIFT]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i8 [[SWITCH_MASKED]], i8 0 +; CHECK-NEXT: ret i8 [[TMP1]] +; +entry: + %rem = urem i64 %x, 5 + switch i64 %rem, label %default [ + i64 0, label %sw.bb0 + i64 1, label %sw.bb1 + i64 2, label %sw.bb2 + ] + +sw.bb0: ; preds = %entry + br label %lor.end + +sw.bb1: ; preds = %entry + br label %lor.end + +sw.bb2: ; preds = %entry + br label %lor.end + +default: ; preds = %entry + br label %lor.end + +lor.end: + %0 = phi i8 [ 15, %sw.bb0 ], [ 6, %sw.bb1 ], [ 7, %sw.bb2 ], [ 0, %default ] + ret i8 %0 +} + +; Negative test: Table size would not fit the register. +define i8 @switch_lookup_with_small_i8_negative(i64 %x) { +; CHECK-LABEL: @switch_lookup_with_small_i8_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[REM:%.*]] = urem i64 [[X:%.*]], 9 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[REM]], 3 +; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[REM]] to i24 +; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i24 [[SWITCH_CAST]], 8 +; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i24 460303, [[SWITCH_SHIFTAMT]] +; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i24 [[SWITCH_DOWNSHIFT]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i8 [[SWITCH_MASKED]], i8 0 +; CHECK-NEXT: ret i8 [[TMP1]] +; +entry: + %rem = urem i64 %x, 9 ; 9 * 8 = 72 > 64, not fit the register + switch i64 %rem, label %default [ + i64 0, label %sw.bb0 + i64 1, label %sw.bb1 + i64 2, label %sw.bb2 + ] + +sw.bb0: ; preds = %entry + br label %lor.end + +sw.bb1: ; preds = %entry + br label %lor.end + +sw.bb2: ; preds = %entry + br label %lor.end + +default: ; preds = %entry + br label %lor.end + +lor.end: + %0 = phi i8 [ 15, %sw.bb0 ], [ 6, %sw.bb1 ], [ 7, %sw.bb2 ], [ 0, %default ] + ret i8 %0 +} From 5e07481d4240b5e8fd85f9b92df30849606c2af0 Mon Sep 17 00:00:00 2001 From: zhongyunde 00443407 Date: Fri, 1 Sep 2023 23:05:45 -0400 Subject: [PATCH 044/877] [SimplifyCFG] Delete the unnecessary range check for small mask operation When the small mask value little than 64, we can eliminate the checking for upper limit of the range by enlarge the lookup table size to the maximum index value. (Then the final table size grows to the next pow2 value) ``` bool f(unsigned x) { switch (x % 8) { case 0: return 1; case 1: return 0; case 2: return 0; case 3: return 1; case 4: return 1; case 5: return 0; case 6: return 1; // This would remove the range check: case 7: return 0; } return 0; } ``` Use WouldFitInRegister instead of fitsInLegalInteger to support more result type beside bool. Fixes https://github.com/llvm/llvm-project/issues/65120 Reviewed By: zmodem, nikic, RKSimon --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 22 +++++++++++++++-- .../Transforms/SimplifyCFG/switch_mask.ll | 24 ++++++++----------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 68b5b1a78a346..18187bcdedf09 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6598,9 +6598,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // If the default destination is unreachable, or if the lookup table covers // all values of the conditional variable, branch directly to the lookup table // BB. Otherwise, check that the condition is within the case range. - const bool DefaultIsReachable = + bool DefaultIsReachable = !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); - const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); // Create the BB that does the lookups. Module &Mod = *CommonDest->getParent()->getParent(); @@ -6631,6 +6630,25 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, BranchInst *RangeCheckBranch = nullptr; + // Grow the table to cover all possible index values to avoid the range check. + if (UseSwitchConditionAsTableIndex) { + ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false); + // Grow the table shouldn't have any size impact by checking + // WouldFitInRegister. + // TODO: Consider growing the table also when it doesn't fit in a register + // if no optsize is specified. + if (all_of(ResultTypes, [&](const auto &KV) { + return SwitchLookupTable::WouldFitInRegister( + DL, CR.getUpper().getLimitedValue(), KV.second /* ResultType */); + })) { + // The default branch is unreachable when we enlarge the lookup table. + // Adjust DefaultIsReachable to reuse code path. + TableSize = CR.getUpper().getZExtValue(); + DefaultIsReachable = false; + } + } + + const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); if (!DefaultIsReachable || GeneratingCoveredLookupTable) { Builder.CreateBr(LookupBB); if (DTU) diff --git a/llvm/test/Transforms/SimplifyCFG/switch_mask.ll b/llvm/test/Transforms/SimplifyCFG/switch_mask.ll index 8c97a0660d070..123519bc69211 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch_mask.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch_mask.ll @@ -8,13 +8,11 @@ define i1 @switch_lookup_with_small_i1(i64 %x) { ; CHECK-LABEL: @switch_lookup_with_small_i1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND:%.*]] = and i64 [[X:%.*]], 15 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[AND]], 11 -; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[AND]] to i11 -; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i11 [[SWITCH_CAST]], 1 -; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i11 -1018, [[SWITCH_SHIFTAMT]] -; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i11 [[SWITCH_DOWNSHIFT]] to i1 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_MASKED]], i1 false -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[AND]] to i16 +; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i16 [[SWITCH_CAST]], 1 +; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i16 1030, [[SWITCH_SHIFTAMT]] +; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i16 [[SWITCH_DOWNSHIFT]] to i1 +; CHECK-NEXT: ret i1 [[SWITCH_MASKED]] ; entry: %and = and i64 %x, 15 @@ -37,13 +35,11 @@ define i8 @switch_lookup_with_small_i8(i64 %x) { ; CHECK-LABEL: @switch_lookup_with_small_i8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[REM:%.*]] = urem i64 [[X:%.*]], 5 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[REM]], 3 -; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[REM]] to i24 -; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i24 [[SWITCH_CAST]], 8 -; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i24 460303, [[SWITCH_SHIFTAMT]] -; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i24 [[SWITCH_DOWNSHIFT]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i8 [[SWITCH_MASKED]], i8 0 -; CHECK-NEXT: ret i8 [[TMP1]] +; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[REM]] to i40 +; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i40 [[SWITCH_CAST]], 8 +; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i40 460303, [[SWITCH_SHIFTAMT]] +; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i40 [[SWITCH_DOWNSHIFT]] to i8 +; CHECK-NEXT: ret i8 [[SWITCH_MASKED]] ; entry: %rem = urem i64 %x, 5 From b47ff361345a02e783452fdfb03eab3a7718758e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 26 Oct 2023 13:12:30 +0200 Subject: [PATCH 045/877] [InstCombine] Drop exact flag instead of increasing demanded bits (#70311) Demanded bit simplification for lshr/ashr will currently demand the low bits if the exact flag is set. This is because these bits must be zero to satisfy the flag. However, this means that our demanded bits simplification is worse for lshr/ashr exact than it is for plain lshr/ashr, which is generally not desirable. Instead, drop the exact flag if a demanded bits simplification of the operand succeeds, which may no longer satisfy the exact flag. This matches what we do for the exact flag on udiv, as well as the nuw/nsw flags on add/sub/mul. --- .../InstCombineSimplifyDemanded.cpp | 21 +++++++------------ llvm/test/Transforms/InstCombine/cast.ll | 4 ++-- llvm/test/Transforms/InstCombine/select-2.ll | 2 +- llvm/test/Transforms/InstCombine/shift.ll | 8 +++---- 4 files changed, 14 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index be005e61a8d2d..cd6b017874e8d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -698,14 +698,11 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Unsigned shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); - - // If the shift is exact, then it does demand the low bits (and knows that - // they are zero). - if (cast(I)->isExact()) - DemandedMaskIn.setLowBits(ShiftAmt); - - if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) { + // exact flag may not longer hold. + I->dropPoisonGeneratingFlags(); return I; + } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShiftAmt); Known.One.lshrInPlace(ShiftAmt); @@ -747,13 +744,11 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (DemandedMask.countl_zero() <= ShiftAmt) DemandedMaskIn.setSignBit(); - // If the shift is exact, then it does demand the low bits (and knows that - // they are zero). - if (cast(I)->isExact()) - DemandedMaskIn.setLowBits(ShiftAmt); - - if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) { + // exact flag may not longer hold. + I->dropPoisonGeneratingFlags(); return I; + } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now plus sign bits. diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index 5b480b1157936..59e488f3f23d5 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -1318,7 +1318,7 @@ define i64 @test83(i16 %a, i64 %k) { define i8 @test84(i32 %a) { ; ALL-LABEL: @test84( ; ALL-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], 2130706432 -; ALL-NEXT: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23 +; ALL-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], 23 ; ALL-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8 ; ALL-NEXT: ret i8 [[TRUNC]] ; @@ -1331,7 +1331,7 @@ define i8 @test84(i32 %a) { define i8 @test85(i32 %a) { ; ALL-LABEL: @test85( ; ALL-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], 2130706432 -; ALL-NEXT: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23 +; ALL-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], 23 ; ALL-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8 ; ALL-NEXT: ret i8 [[TRUNC]] ; diff --git a/llvm/test/Transforms/InstCombine/select-2.ll b/llvm/test/Transforms/InstCombine/select-2.ll index 2e4161f5d80aa..148b0dcf10259 100644 --- a/llvm/test/Transforms/InstCombine/select-2.ll +++ b/llvm/test/Transforms/InstCombine/select-2.ll @@ -45,7 +45,7 @@ define float @t3(float %x, float %y) { define i8 @ashr_exact_poison_constant_fold(i1 %b, i8 %x) { ; CHECK-LABEL: @ashr_exact_poison_constant_fold( -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[X:%.*]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = ashr i8 [[X:%.*]], 3 ; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], i8 [[TMP1]], i8 5 ; CHECK-NEXT: ret i8 [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index 7b9626331ff29..913ef2a74aebd 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -2141,9 +2141,8 @@ define i16 @lshr_and_not_demanded(i8 %x) { define i16 @lshr_exact_and_not_demanded(i8 %x) { ; CHECK-LABEL: @lshr_exact_and_not_demanded( -; CHECK-NEXT: [[Y:%.*]] = and i8 [[X:%.*]], -2 -; CHECK-NEXT: [[Y_EXT:%.*]] = sext i8 [[Y]] to i16 -; CHECK-NEXT: [[SHR:%.*]] = lshr exact i16 [[Y_EXT]], 1 +; CHECK-NEXT: [[Y_EXT:%.*]] = sext i8 [[X:%.*]] to i16 +; CHECK-NEXT: [[SHR:%.*]] = lshr i16 [[Y_EXT]], 1 ; CHECK-NEXT: ret i16 [[SHR]] ; %y = and i8 %x, -2 @@ -2177,8 +2176,7 @@ define i16 @ashr_umax_not_demanded(i16 %x) { define i16 @ashr_exact_umax_not_demanded(i16 %x) { ; CHECK-LABEL: @ashr_exact_umax_not_demanded( -; CHECK-NEXT: [[Y:%.*]] = call i16 @llvm.umax.i16(i16 [[X:%.*]], i16 1) -; CHECK-NEXT: [[SHR:%.*]] = ashr exact i16 [[Y]], 1 +; CHECK-NEXT: [[SHR:%.*]] = ashr i16 [[X:%.*]], 1 ; CHECK-NEXT: ret i16 [[SHR]] ; %y = call i16 @llvm.umax.i16(i16 %x, i16 1) From eb737d6a7644d66ae57b0b4b8a30c6d2d2d961dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 26 Oct 2023 13:08:24 +0200 Subject: [PATCH 046/877] [clang][Interp][NFC] Make another Descriptor param const --- clang/lib/AST/Interp/InterpBlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/Interp/InterpBlock.h b/clang/lib/AST/Interp/InterpBlock.h index 4ab67ebb9eaaf..9d0c4859fd06c 100644 --- a/clang/lib/AST/Interp/InterpBlock.h +++ b/clang/lib/AST/Interp/InterpBlock.h @@ -49,7 +49,7 @@ enum PrimType : unsigned; class Block final { public: /// Creates a new block. - Block(const std::optional &DeclID, Descriptor *Desc, + Block(const std::optional &DeclID, const Descriptor *Desc, bool IsStatic = false, bool IsExtern = false) : DeclID(DeclID), IsStatic(IsStatic), IsExtern(IsExtern), Desc(Desc) {} From 96e040acee7c1728506ec49a5a229bfecd49f7db Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 26 Oct 2023 12:18:58 +0100 Subject: [PATCH 047/877] [mlir][ArmSVE] Add `-arm-sve-legalize-vector-storage` pass (#68794) This patch adds a pass that ensures that loads, stores, and allocations of SVE vector types will be legal in the LLVM backend. It does this at the memref level, so this pass must be applied before lowering all the way to LLVM. This pass currently fixes two issues. ## Loading and storing predicate types It is only legal to load/store predicate types equal to (or greater than) a full predicate register, which in MLIR is `vector<[16]xi1>`. Smaller predicate types (`vector<[1|2|4|8]xi1>`) must be converted to/from a full predicate type (referred to as a `svbool`) before and after storing and loading respectively. This pass does this by widening allocations and inserting conversion intrinsics. For example: ```mlir %alloca = memref.alloca() : memref> %mask = vector.constant_mask [4] : vector<[4]xi1> memref.store %mask, %alloca[] : memref> %reload = memref.load %alloca[] : memref> ``` Becomes: ```mlir %alloca = memref.alloca() {alignment = 1 : i64} : memref> %mask = vector.constant_mask [4] : vector<[4]xi1> %svbool = arm_sve.convert_to_svbool %mask : vector<[4]xi1> memref.store %svbool, %alloca[] : memref> %reload_svbool = memref.load %alloca[] : memref> %reload = arm_sve.convert_from_svbool %reload_svbool : vector<[4]xi1> ``` ## Relax alignments for SVE vector allocas The storage for SVE vector types only needs to have an alignment that matches the element type (for example 4 byte alignment for `f32`s). However, the LLVM backend currently defaults to aligning to `base size x element size` bytes. For non-legal vector types like `vector<[8]xf32>` this results in 8 x 4 = 32-byte alignment, but the backend only supports up to 16-byte alignment for SVE vectors on the stack. Explicitly setting a smaller alignment prevents this issue. Depends on: #68586 and #68695 (for testing) --- .../mlir/Dialect/ArmSVE/CMakeLists.txt | 1 + .../Dialect/ArmSVE/Transforms/CMakeLists.txt | 5 + .../mlir/Dialect/ArmSVE/Transforms/Passes.h | 36 ++ .../mlir/Dialect/ArmSVE/Transforms/Passes.td | 68 ++++ mlir/include/mlir/InitAllPasses.h | 2 + .../Dialect/ArmSVE/Transforms/CMakeLists.txt | 2 + .../Transforms/LegalizeVectorStorage.cpp | 338 ++++++++++++++++++ .../ArmSVE/legalize-vector-storage.mlir | 203 +++++++++++ .../ArmSVE/arrays-of-scalable-vectors.mlir | 117 ++++++ 9 files changed, 772 insertions(+) create mode 100644 mlir/include/mlir/Dialect/ArmSVE/Transforms/CMakeLists.txt create mode 100644 mlir/include/mlir/Dialect/ArmSVE/Transforms/Passes.h create mode 100644 mlir/include/mlir/Dialect/ArmSVE/Transforms/Passes.td create mode 100644 mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp create mode 100644 mlir/test/Dialect/ArmSVE/legalize-vector-storage.mlir create mode 100644 mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/arrays-of-scalable-vectors.mlir diff --git a/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt b/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt index f33061b2d87cf..9f57627c321fb 100644 --- a/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(IR) +add_subdirectory(Transforms) diff --git a/mlir/include/mlir/Dialect/ArmSVE/Transforms/CMakeLists.txt b/mlir/include/mlir/Dialect/ArmSVE/Transforms/CMakeLists.txt new file mode 100644 index 0000000000000..7226642daf861 --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/Transforms/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name ArmSVE) +add_public_tablegen_target(MLIRArmSVEPassIncGen) + +add_mlir_doc(Passes ArmSVEPasses ./ -gen-pass-doc) diff --git a/mlir/include/mlir/Dialect/ArmSVE/Transforms/Passes.h b/mlir/include/mlir/Dialect/ArmSVE/Transforms/Passes.h new file mode 100644 index 0000000000000..66f30a67cb05b --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/Transforms/Passes.h @@ -0,0 +1,36 @@ +//===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_ARMSVE_TRANSFORMS_PASSES_H +#define MLIR_DIALECT_ARMSVE_TRANSFORMS_PASSES_H + +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Pass/Pass.h" + +namespace mlir::arm_sve { + +#define GEN_PASS_DECL +#include "mlir/Dialect/ArmSVE/Transforms/Passes.h.inc" + +/// Pass to legalize Arm SVE vector storage. +std::unique_ptr createLegalizeVectorStoragePass(); + +/// Collect a set of patterns to legalize Arm SVE vector storage. +void populateLegalizeVectorStoragePatterns(RewritePatternSet &patterns); + +//===----------------------------------------------------------------------===// +// Registration +//===----------------------------------------------------------------------===// + +/// Generate the code for registering passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/ArmSVE/Transforms/Passes.h.inc" + +} // namespace mlir::arm_sve + +#endif // MLIR_DIALECT_ARMSVE_TRANSFORMS_PASSES_H diff --git a/mlir/include/mlir/Dialect/ArmSVE/Transforms/Passes.td b/mlir/include/mlir/Dialect/ArmSVE/Transforms/Passes.td new file mode 100644 index 0000000000000..d7cb309db5253 --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/Transforms/Passes.td @@ -0,0 +1,68 @@ +//===-- Passes.td - ArmSVE pass definition file ------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_ARMSVE_TRANSFORMS_PASSES_TD +#define MLIR_DIALECT_ARMSVE_TRANSFORMS_PASSES_TD + +include "mlir/Pass/PassBase.td" + +def LegalizeVectorStorage + : Pass<"arm-sve-legalize-vector-storage", "mlir::func::FuncOp"> { + let summary = "Ensures stores of SVE vector types will be legal"; + let description = [{ + This pass ensures that loads, stores, and allocations of SVE vector types + will be legal in the LLVM backend. It does this at the memref level, so this + pass must be applied before lowering all the way to LLVM. + + This pass currently addresses two issues. + + ## Loading and storing predicate types + + It is only legal to load/store predicate types equal to (or greater than) a + full predicate register, which in MLIR is `vector<[16]xi1>`. Smaller + predicate types (`vector<[1|2|4|8]xi1>`) must be converted to/from a full + predicate type (referred to as a `svbool`) before and after storing and + loading respectively. This pass does this by widening allocations and + inserting conversion intrinsics. Note: Non-powers-of-two masks (e.g. + `vector<[7]xi1>`), which are not SVE predicates, are ignored. + + For example: + + ```mlir + %alloca = memref.alloca() : memref> + %mask = vector.constant_mask [4] : vector<[4]xi1> + memref.store %mask, %alloca[] : memref> + %reload = memref.load %alloca[] : memref> + ``` + Becomes: + ```mlir + %alloca = memref.alloca() {alignment = 1 : i64} : memref> + %mask = vector.constant_mask [4] : vector<[4]xi1> + %svbool = arm_sve.convert_to_svbool %mask : vector<[4]xi1> + memref.store %svbool, %alloca[] : memref> + %reload_svbool = memref.load %alloca[] : memref> + %reload = arm_sve.convert_from_svbool %reload_svbool : vector<[4]xi1> + ``` + + ## Relax alignments for SVE vector allocas + + The storage for SVE vector types only needs to have an alignment that + matches the element type (for example 4 byte alignment for `f32`s). However, + the LLVM backend currently defaults to aligning to `base size` x + `element size` bytes. For non-legal vector types like `vector<[8]xf32>` this + results in 8 x 4 = 32-byte alignment, but the backend only supports up to + 16-byte alignment for SVE vectors on the stack. Explicitly setting a smaller + alignment prevents this issue. + }]; + let constructor = "mlir::arm_sve::createLegalizeVectorStoragePass()"; + let dependentDialects = ["func::FuncDialect", + "memref::MemRefDialect", "vector::VectorDialect", + "arm_sve::ArmSVEDialect"]; +} + +#endif // MLIR_DIALECT_ARMSVE_TRANSFORMS_PASSES_TD diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h index 5489a13a8040b..7301905954f56 100644 --- a/mlir/include/mlir/InitAllPasses.h +++ b/mlir/include/mlir/InitAllPasses.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/Affine/Passes.h" #include "mlir/Dialect/Arith/Transforms/Passes.h" #include "mlir/Dialect/ArmSME/Transforms/Passes.h" +#include "mlir/Dialect/ArmSVE/Transforms/Passes.h" #include "mlir/Dialect/Async/Passes.h" #include "mlir/Dialect/Bufferization/Pipelines/Passes.h" #include "mlir/Dialect/Bufferization/Transforms/Passes.h" @@ -82,6 +83,7 @@ inline void registerAllPasses() { transform::registerTransformPasses(); vector::registerVectorPasses(); arm_sme::registerArmSMEPasses(); + arm_sve::registerArmSVEPasses(); // Dialect pipelines bufferization::registerBufferizationPipelines(); diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt b/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt index 2f1c43fae240d..a70c489a51fea 100644 --- a/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt @@ -1,8 +1,10 @@ add_mlir_dialect_library(MLIRArmSVETransforms LegalizeForLLVMExport.cpp + LegalizeVectorStorage.cpp DEPENDS MLIRArmSVEConversionsIncGen + MLIRArmSVEPassIncGen LINK_LIBS PUBLIC MLIRArmSVEDialect diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp new file mode 100644 index 0000000000000..bee1f3659753b --- /dev/null +++ b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp @@ -0,0 +1,338 @@ +//===- LegalizeVectorStorage.cpp - Ensures SVE loads/stores are legal -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/ArmSVE/IR/ArmSVEDialect.h" +#include "mlir/Dialect/ArmSVE/Transforms/Passes.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +namespace mlir::arm_sve { +#define GEN_PASS_DEF_LEGALIZEVECTORSTORAGE +#include "mlir/Dialect/ArmSVE/Transforms/Passes.h.inc" +} // namespace mlir::arm_sve + +using namespace mlir; +using namespace mlir::arm_sve; + +// A tag to mark unrealized_conversions produced by this pass. This is used to +// detect IR this pass failed to completely legalize, and report an error. +// If everything was successfully legalized, no tagged ops will remain after +// this pass. +constexpr StringLiteral kSVELegalizerTag("__arm_sve_legalize_vector_storage__"); + +/// Definitions: +/// +/// [1] svbool = vector<...x[16]xi1>, which maps to some multiple of full SVE +/// predicate registers. A full predicate is the smallest quantity that can be +/// loaded/stored. +/// +/// [2] SVE mask = hardware-sized SVE predicate mask, i.e. its trailing +/// dimension matches the size of a legal SVE vector size (such as +/// vector<[4]xi1>), but is too small to be stored to memory (i.e smaller than +/// a svbool). + +namespace { + +/// Checks if a vector type is a SVE mask [2]. +bool isSVEMaskType(VectorType type) { + return type.getRank() > 0 && type.getElementType().isInteger(1) && + type.getScalableDims().back() && type.getShape().back() < 16 && + llvm::isPowerOf2_32(type.getShape().back()) && + !llvm::is_contained(type.getScalableDims().drop_back(), true); +} + +VectorType widenScalableMaskTypeToSvbool(VectorType type) { + assert(isSVEMaskType(type)); + return VectorType::Builder(type).setDim(type.getRank() - 1, 16); +} + +/// A helper for cloning an op and replacing it will a new version, updated by a +/// callback. +template +void replaceOpWithLegalizedOp(PatternRewriter &rewriter, TOp op, + TLegalizerCallback callback) { + // Clone the previous op to preserve any properties/attributes. + auto newOp = op.clone(); + rewriter.insert(newOp); + rewriter.replaceOp(op, callback(newOp)); +} + +/// A helper for cloning an op and replacing it with a new version, updated by a +/// callback, and an unrealized conversion back to the type of the replaced op. +template +void replaceOpWithUnrealizedConversion(PatternRewriter &rewriter, TOp op, + TLegalizerCallback callback) { + replaceOpWithLegalizedOp(rewriter, op, [&](TOp newOp) { + // Mark our `unrealized_conversion_casts` with a pass label. + return rewriter.create( + op.getLoc(), TypeRange{op.getResult().getType()}, + ValueRange{callback(newOp)}, + NamedAttribute(rewriter.getStringAttr(kSVELegalizerTag), + rewriter.getUnitAttr())); + }); +} + +/// Extracts the widened SVE memref value (that's legal to store/load) from the +/// `unrealized_conversion_cast`s added by this pass. +static FailureOr getSVELegalizedMemref(Value illegalMemref) { + Operation *definingOp = illegalMemref.getDefiningOp(); + if (!definingOp || !definingOp->hasAttr(kSVELegalizerTag)) + return failure(); + auto unrealizedConversion = + llvm::cast(definingOp); + return unrealizedConversion.getOperand(0); +} + +/// The default alignment of an alloca in LLVM may request overaligned sizes for +/// SVE types, which will fail during stack frame allocation. This rewrite +/// explicitly adds a reasonable alignment to allocas of scalable types. +struct RelaxScalableVectorAllocaAlignment + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(memref::AllocaOp allocaOp, + PatternRewriter &rewriter) const override { + auto memrefElementType = allocaOp.getType().getElementType(); + auto vectorType = llvm::dyn_cast(memrefElementType); + if (!vectorType || !vectorType.isScalable() || allocaOp.getAlignment()) + return failure(); + + // Set alignment based on the defaults for SVE vectors and predicates. + unsigned aligment = vectorType.getElementType().isInteger(1) ? 2 : 16; + allocaOp.setAlignment(aligment); + + return success(); + } +}; + +/// Replaces allocations of SVE predicates smaller than an svbool [1] (_illegal_ +/// to load/store) with a wider allocation of svbool (_legal_ to load/store) +/// followed by a tagged unrealized conversion to the original type. +/// +/// Example +/// ``` +/// %alloca = memref.alloca() : memref> +/// ``` +/// is rewritten into: +/// ``` +/// %widened = memref.alloca() {alignment = 1 : i64} : memref> +/// %alloca = builtin.unrealized_conversion_cast %widened +/// : memref> to memref> +/// {__arm_sve_legalize_vector_storage__} +/// ``` +template +struct LegalizeSVEMaskAllocation : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(AllocLikeOp allocLikeOp, + PatternRewriter &rewriter) const override { + auto vectorType = + llvm::dyn_cast(allocLikeOp.getType().getElementType()); + + if (!vectorType || !isSVEMaskType(vectorType)) + return failure(); + + // Replace this alloc-like op of an SVE mask [2] with one of a (storable) + // svbool mask [1]. A temporary unrealized_conversion_cast is added to the + // old type to allow local rewrites. + replaceOpWithUnrealizedConversion( + rewriter, allocLikeOp, [&](AllocLikeOp newAllocLikeOp) { + newAllocLikeOp.getResult().setType( + llvm::cast(newAllocLikeOp.getType().cloneWith( + {}, widenScalableMaskTypeToSvbool(vectorType)))); + return newAllocLikeOp; + }); + + return success(); + } +}; + +/// Replaces vector.type_casts of unrealized conversions to SVE predicate memref +/// types that are _illegal_ to load/store from (!= svbool [1]), with type casts +/// of memref types that are _legal_ to load/store, followed by unrealized +/// conversions. +/// +/// Example: +/// ``` +/// %alloca = builtin.unrealized_conversion_cast %widened +/// : memref> to memref> +/// {__arm_sve_legalize_vector_storage__} +/// %cast = vector.type_cast %alloca +/// : memref> to memref<3xvector<[8]xi1>> +/// ``` +/// is rewritten into: +/// ``` +/// %widened_cast = vector.type_cast %widened +/// : memref> to memref<3xvector<[16]xi1>> +/// %cast = builtin.unrealized_conversion_cast %widened_cast +/// : memref<3xvector<[16]xi1>> to memref<3xvector<[8]xi1>> +/// {__arm_sve_legalize_vector_storage__} +/// ``` +struct LegalizeSVEMaskTypeCastConversion + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TypeCastOp typeCastOp, + PatternRewriter &rewriter) const override { + auto resultType = typeCastOp.getResultMemRefType(); + auto vectorType = llvm::dyn_cast(resultType.getElementType()); + + if (!vectorType || !isSVEMaskType(vectorType)) + return failure(); + + auto legalMemref = getSVELegalizedMemref(typeCastOp.getMemref()); + if (failed(legalMemref)) + return failure(); + + // Replace this vector.type_cast with one of a (storable) svbool mask [1]. + replaceOpWithUnrealizedConversion( + rewriter, typeCastOp, [&](vector::TypeCastOp newTypeCast) { + newTypeCast.setOperand(*legalMemref); + newTypeCast.getResult().setType( + llvm::cast(newTypeCast.getType().cloneWith( + {}, widenScalableMaskTypeToSvbool(vectorType)))); + return newTypeCast; + }); + + return success(); + } +}; + +/// Replaces stores to unrealized conversions to SVE predicate memref types that +/// are _illegal_ to load/store from (!= svbool [1]), with +/// `arm_sve.convert_to_svbool`s followed by (legal) wider stores. +/// +/// Example: +/// ``` +/// memref.store %mask, %alloca[] : memref> +/// ``` +/// is rewritten into: +/// ``` +/// %svbool = arm_sve.convert_to_svbool %mask : vector<[8]xi1> +/// memref.store %svbool, %widened[] : memref> +/// ``` +struct LegalizeSVEMaskStoreConversion + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(memref::StoreOp storeOp, + PatternRewriter &rewriter) const override { + auto loc = storeOp.getLoc(); + + Value valueToStore = storeOp.getValueToStore(); + auto vectorType = llvm::dyn_cast(valueToStore.getType()); + + if (!vectorType || !isSVEMaskType(vectorType)) + return failure(); + + auto legalMemref = getSVELegalizedMemref(storeOp.getMemref()); + if (failed(legalMemref)) + return failure(); + + auto legalMaskType = widenScalableMaskTypeToSvbool( + llvm::cast(valueToStore.getType())); + auto convertToSvbool = rewriter.create( + loc, legalMaskType, valueToStore); + // Replace this store with a conversion to a storable svbool mask [1], + // followed by a wider store. + replaceOpWithLegalizedOp(rewriter, storeOp, + [&](memref::StoreOp newStoreOp) { + newStoreOp.setOperand(0, convertToSvbool); + newStoreOp.setOperand(1, *legalMemref); + return newStoreOp; + }); + + return success(); + } +}; + +/// Replaces loads from unrealized conversions to SVE predicate memref types +/// that are _illegal_ to load/store from (!= svbool [1]), types with (legal) +/// wider loads, followed by `arm_sve.convert_from_svbool`s. +/// +/// Example: +/// ``` +/// %reload = memref.load %alloca[] : memref> +/// ``` +/// is rewritten into: +/// ``` +/// %svbool = memref.load %widened[] : memref> +/// %reload = arm_sve.convert_from_svbool %reload : vector<[4]xi1> +/// ``` +struct LegalizeSVEMaskLoadConversion : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(memref::LoadOp loadOp, + PatternRewriter &rewriter) const override { + auto loc = loadOp.getLoc(); + + Value loadedMask = loadOp.getResult(); + auto vectorType = llvm::dyn_cast(loadedMask.getType()); + + if (!vectorType || !isSVEMaskType(vectorType)) + return failure(); + + auto legalMemref = getSVELegalizedMemref(loadOp.getMemref()); + if (failed(legalMemref)) + return failure(); + + auto legalMaskType = widenScalableMaskTypeToSvbool(vectorType); + // Replace this load with a legal load of an svbool type, followed by a + // conversion back to the original type. + replaceOpWithLegalizedOp(rewriter, loadOp, [&](memref::LoadOp newLoadOp) { + newLoadOp.setMemRef(*legalMemref); + newLoadOp.getResult().setType(legalMaskType); + return rewriter.create( + loc, loadedMask.getType(), newLoadOp); + }); + + return success(); + } +}; + +} // namespace + +void mlir::arm_sve::populateLegalizeVectorStoragePatterns( + RewritePatternSet &patterns) { + patterns.add, + LegalizeSVEMaskAllocation, + LegalizeSVEMaskTypeCastConversion, + LegalizeSVEMaskStoreConversion, LegalizeSVEMaskLoadConversion>( + patterns.getContext()); +} + +namespace { +struct LegalizeVectorStorage + : public arm_sve::impl::LegalizeVectorStorageBase { + + void runOnOperation() override { + RewritePatternSet patterns(&getContext()); + populateLegalizeVectorStoragePatterns(patterns); + if (failed(applyPatternsAndFoldGreedily(getOperation(), + std::move(patterns)))) { + signalPassFailure(); + } + ConversionTarget target(getContext()); + target.addDynamicallyLegalOp( + [](UnrealizedConversionCastOp unrealizedConversion) { + return !unrealizedConversion->hasAttr(kSVELegalizerTag); + }); + // This detects if we failed to completely legalize the IR. + if (failed(applyPartialConversion(getOperation(), target, {}))) + signalPassFailure(); + } +}; + +} // namespace + +std::unique_ptr mlir::arm_sve::createLegalizeVectorStoragePass() { + return std::make_unique(); +} diff --git a/mlir/test/Dialect/ArmSVE/legalize-vector-storage.mlir b/mlir/test/Dialect/ArmSVE/legalize-vector-storage.mlir new file mode 100644 index 0000000000000..9a3df8376f121 --- /dev/null +++ b/mlir/test/Dialect/ArmSVE/legalize-vector-storage.mlir @@ -0,0 +1,203 @@ +// RUN: mlir-opt %s -allow-unregistered-dialect -arm-sve-legalize-vector-storage -split-input-file -verify-diagnostics | FileCheck %s + +/// This tests the basic functionality of the -arm-sve-legalize-vector-storage pass. + +// ----- + +// CHECK-LABEL: @store_and_reload_sve_predicate_nxv1i1( +// CHECK-SAME: %[[MASK:.*]]: vector<[1]xi1>) +func.func @store_and_reload_sve_predicate_nxv1i1(%mask: vector<[1]xi1>) -> vector<[1]xi1> { + // CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca() {alignment = 2 : i64} : memref> + %alloca = memref.alloca() : memref> + // CHECK-NEXT: %[[SVBOOL:.*]] = arm_sve.convert_to_svbool %[[MASK]] : vector<[1]xi1> + // CHECK-NEXT: memref.store %[[SVBOOL]], %[[ALLOCA]][] : memref> + memref.store %mask, %alloca[] : memref> + // CHECK-NEXT: %[[RELOAD:.*]] = memref.load %[[ALLOCA]][] : memref> + // CHECK-NEXT: %[[MASK:.*]] = arm_sve.convert_from_svbool %[[RELOAD]] : vector<[1]xi1> + %reload = memref.load %alloca[] : memref> + // CHECK-NEXT: return %[[MASK]] : vector<[1]xi1> + return %reload : vector<[1]xi1> +} + +// ----- + +// CHECK-LABEL: @store_and_reload_sve_predicate_nxv2i1( +// CHECK-SAME: %[[MASK:.*]]: vector<[2]xi1>) +func.func @store_and_reload_sve_predicate_nxv2i1(%mask: vector<[2]xi1>) -> vector<[2]xi1> { + // CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca() {alignment = 2 : i64} : memref> + %alloca = memref.alloca() : memref> + // CHECK-NEXT: %[[SVBOOL:.*]] = arm_sve.convert_to_svbool %[[MASK]] : vector<[2]xi1> + // CHECK-NEXT: memref.store %[[SVBOOL]], %[[ALLOCA]][] : memref> + memref.store %mask, %alloca[] : memref> + // CHECK-NEXT: %[[RELOAD:.*]] = memref.load %[[ALLOCA]][] : memref> + // CHECK-NEXT: %[[MASK:.*]] = arm_sve.convert_from_svbool %[[RELOAD]] : vector<[2]xi1> + %reload = memref.load %alloca[] : memref> + // CHECK-NEXT: return %[[MASK]] : vector<[2]xi1> + return %reload : vector<[2]xi1> +} + +// ----- + +// CHECK-LABEL: @store_and_reload_sve_predicate_nxv4i1( +// CHECK-SAME: %[[MASK:.*]]: vector<[4]xi1>) +func.func @store_and_reload_sve_predicate_nxv4i1(%mask: vector<[4]xi1>) -> vector<[4]xi1> { + // CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca() {alignment = 2 : i64} : memref> + %alloca = memref.alloca() : memref> + // CHECK-NEXT: %[[SVBOOL:.*]] = arm_sve.convert_to_svbool %[[MASK]] : vector<[4]xi1> + // CHECK-NEXT: memref.store %[[SVBOOL]], %[[ALLOCA]][] : memref> + memref.store %mask, %alloca[] : memref> + // CHECK-NEXT: %[[RELOAD:.*]] = memref.load %[[ALLOCA]][] : memref> + // CHECK-NEXT: %[[MASK:.*]] = arm_sve.convert_from_svbool %[[RELOAD]] : vector<[4]xi1> + %reload = memref.load %alloca[] : memref> + // CHECK-NEXT: return %[[MASK]] : vector<[4]xi1> + return %reload : vector<[4]xi1> +} + +// ----- + +// CHECK-LABEL: @store_and_reload_sve_predicate_nxv8i1( +// CHECK-SAME: %[[MASK:.*]]: vector<[8]xi1>) +func.func @store_and_reload_sve_predicate_nxv8i1(%mask: vector<[8]xi1>) -> vector<[8]xi1> { + // CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca() {alignment = 2 : i64} : memref> + %alloca = memref.alloca() : memref> + // CHECK-NEXT: %[[SVBOOL:.*]] = arm_sve.convert_to_svbool %[[MASK]] : vector<[8]xi1> + // CHECK-NEXT: memref.store %[[SVBOOL]], %[[ALLOCA]][] : memref> + memref.store %mask, %alloca[] : memref> + // CHECK-NEXT: %[[RELOAD:.*]] = memref.load %[[ALLOCA]][] : memref> + // CHECK-NEXT: %[[MASK:.*]] = arm_sve.convert_from_svbool %[[RELOAD]] : vector<[8]xi1> + %reload = memref.load %alloca[] : memref> + // CHECK-NEXT: return %[[MASK]] : vector<[8]xi1> + return %reload : vector<[8]xi1> +} + +// ----- + +// CHECK-LABEL: @store_and_reload_sve_predicate_nxv16i1( +// CHECK-SAME: %[[MASK:.*]]: vector<[16]xi1>) +func.func @store_and_reload_sve_predicate_nxv16i1(%mask: vector<[16]xi1>) -> vector<[16]xi1> { + // CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca() {alignment = 2 : i64} : memref> + %alloca = memref.alloca() : memref> + // CHECK-NEXT: memref.store %[[MASK]], %[[ALLOCA]][] : memref> + memref.store %mask, %alloca[] : memref> + // CHECK-NEXT: %[[RELOAD:.*]] = memref.load %[[ALLOCA]][] : memref> + %reload = memref.load %alloca[] : memref> + // CHECK-NEXT: return %[[RELOAD]] : vector<[16]xi1> + return %reload : vector<[16]xi1> +} + +// ----- + +/// This is not a valid SVE mask type, so is ignored by the +// `-arm-sve-legalize-vector-storage` pass. + +// CHECK-LABEL: @store_and_reload_unsupported_type( +// CHECK-SAME: %[[MASK:.*]]: vector<[7]xi1>) +func.func @store_and_reload_unsupported_type(%mask: vector<[7]xi1>) -> vector<[7]xi1> { + // CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca() {alignment = 2 : i64} : memref> + %alloca = memref.alloca() : memref> + // CHECK-NEXT: memref.store %[[MASK]], %[[ALLOCA]][] : memref> + memref.store %mask, %alloca[] : memref> + // CHECK-NEXT: %[[RELOAD:.*]] = memref.load %[[ALLOCA]][] : memref> + %reload = memref.load %alloca[] : memref> + // CHECK-NEXT: return %[[RELOAD]] : vector<[7]xi1> + return %reload : vector<[7]xi1> +} + +// ----- + +// CHECK-LABEL: @store_2d_mask_and_reload_slice( +// CHECK-SAME: %[[MASK:.*]]: vector<3x[8]xi1>) +func.func @store_2d_mask_and_reload_slice(%mask: vector<3x[8]xi1>) -> vector<[8]xi1> { + // CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : index + %c0 = arith.constant 0 : index + // CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca() {alignment = 2 : i64} : memref> + %alloca = memref.alloca() : memref> + // CHECK-NEXT: %[[SVBOOL:.*]] = arm_sve.convert_to_svbool %[[MASK]] : vector<3x[8]xi1> + // CHECK-NEXT: memref.store %[[SVBOOL]], %[[ALLOCA]][] : memref> + memref.store %mask, %alloca[] : memref> + // CHECK-NEXT: %[[UNPACK:.*]] = vector.type_cast %[[ALLOCA]] : memref> to memref<3xvector<[16]xi1>> + %unpack = vector.type_cast %alloca : memref> to memref<3xvector<[8]xi1>> + // CHECK-NEXT: %[[RELOAD:.*]] = memref.load %[[UNPACK]][%[[C0]]] : memref<3xvector<[16]xi1>> + // CHECK-NEXT: %[[SLICE:.*]] = arm_sve.convert_from_svbool %[[RELOAD]] : vector<[8]xi1> + %slice = memref.load %unpack[%c0] : memref<3xvector<[8]xi1>> + // CHECK-NEXT: return %[[SLICE]] : vector<[8]xi1> + return %slice : vector<[8]xi1> +} + +// ----- + +// CHECK-LABEL: @set_sve_alloca_alignment +func.func @set_sve_alloca_alignment() { + /// This checks the alignment of alloca's of scalable vectors will be + /// something the backend can handle. Currently, the backend sets the + /// alignment of scalable vectors to their base size (i.e. their size at + /// vscale = 1). This works for hardware-sized types, which always get a + /// 16-byte alignment. The problem is larger types e.g. vector<[8]xf32> end up + /// with alignments larger than 16-bytes (e.g. 32-bytes here), which are + /// unsupported. The `-arm-sve-legalize-vector-storage` pass avoids this + /// issue by explicitly setting the alignment to 16-bytes for all scalable + /// vectors. + + // CHECK-COUNT-6: alignment = 16 + %a1 = memref.alloca() : memref> + %a2 = memref.alloca() : memref> + %a3 = memref.alloca() : memref> + %a4 = memref.alloca() : memref> + %a5 = memref.alloca() : memref> + %a6 = memref.alloca() : memref> + + // CHECK-COUNT-6: alignment = 16 + %b1 = memref.alloca() : memref> + %b2 = memref.alloca() : memref> + %b3 = memref.alloca() : memref> + %b4 = memref.alloca() : memref> + %b5 = memref.alloca() : memref> + %b6 = memref.alloca() : memref> + + // CHECK-COUNT-6: alignment = 16 + %c1 = memref.alloca() : memref> + %c2 = memref.alloca() : memref> + %c3 = memref.alloca() : memref> + %c4 = memref.alloca() : memref> + %c5 = memref.alloca() : memref> + %c6 = memref.alloca() : memref> + + // CHECK-COUNT-6: alignment = 16 + %d1 = memref.alloca() : memref> + %d2 = memref.alloca() : memref> + %d3 = memref.alloca() : memref> + %d4 = memref.alloca() : memref> + %d5 = memref.alloca() : memref> + %d6 = memref.alloca() : memref> + + // CHECK-COUNT-6: alignment = 16 + %e1 = memref.alloca() : memref> + %e2 = memref.alloca() : memref> + %e3 = memref.alloca() : memref> + %e4 = memref.alloca() : memref> + %e5 = memref.alloca() : memref> + %e6 = memref.alloca() : memref> + + // CHECK-COUNT-6: alignment = 16 + %f1 = memref.alloca() : memref> + %f2 = memref.alloca() : memref> + %f3 = memref.alloca() : memref> + %f4 = memref.alloca() : memref> + %f5 = memref.alloca() : memref> + %f6 = memref.alloca() : memref> + + "prevent.dce"( + %a1, %a2, %a3, %a4, %a5, %a6, + %b1, %b2, %b3, %b4, %b5, %b6, + %c1, %c2, %c3, %c4, %c5, %c6, + %d1, %d2, %d3, %d4, %d5, %d6, + %e1, %e2, %e3, %e4, %e5, %e6, + %f1, %f2, %f3, %f4, %f5, %f6) + : (memref>, memref>, memref>, memref>, memref>, memref>, + memref>, memref>, memref>, memref>, memref>, memref>, + memref>, memref>, memref>, memref>, memref>, memref>, + memref>, memref>, memref>, memref>, memref>, memref>, + memref>, memref>, memref>, memref>, memref>, memref>, + memref>, memref>, memref>, memref>, memref>, memref>) -> () + return +} diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/arrays-of-scalable-vectors.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/arrays-of-scalable-vectors.mlir new file mode 100644 index 0000000000000..c486bf0de5d35 --- /dev/null +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/arrays-of-scalable-vectors.mlir @@ -0,0 +1,117 @@ +// RUN: mlir-opt %s -convert-vector-to-scf -arm-sve-legalize-vector-storage -convert-vector-to-llvm="enable-arm-sve" -test-lower-to-llvm | \ +// RUN: %mcr_aarch64_cmd -e=entry -entry-point-result=void --march=aarch64 --mattr="+sve" -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +/// This tests basic functionality of arrays of scalable vectors, which in MLIR +/// are vectors with a single trailing scalable dimension. This test requires +/// the -arm-sve-legalize-vector-storage pass to ensure the loads/stores done +/// here are be legal for the LLVM backend. + +func.func @read_and_print_2d_vector(%memref: memref<3x?xf32>) { + %cst = arith.constant 0.000000e+00 : f32 + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %dim = memref.dim %memref, %c1 : memref<3x?xf32> + %mask = vector.create_mask %c2, %dim : vector<3x[8]xi1> + %vector = vector.transfer_read %memref[%c0,%c0], %cst, %mask {in_bounds = [true, true]} : memref<3x?xf32>, vector<3x[8]xf32> + + /// TODO: Support vector.print for arrays of scalable vectors. + %row0 = vector.extract %vector[0] : vector<[8]xf32> from vector<3x[8]xf32> + %row1 = vector.extract %vector[1] : vector<[8]xf32> from vector<3x[8]xf32> + %row2 = vector.extract %vector[2] : vector<[8]xf32> from vector<3x[8]xf32> + + /// Print each of the vectors. + /// vscale is >= 1, so at least 8 elements will be printed. + + vector.print str "read_and_print_2d_vector()" + // CHECK-LABEL: read_and_print_2d_vector() + // CHECK: ( 8, 8, 8, 8, 8, 8, 8, 8 + vector.print %row0 : vector<[8]xf32> + // CHECK: ( 8, 8, 8, 8, 8, 8, 8, 8 + vector.print %row1 : vector<[8]xf32> + /// This last row is all zero due to our mask. + // CHECK: ( 0, 0, 0, 0, 0, 0, 0, 0 + vector.print %row2 : vector<[8]xf32> + + return +} + +func.func @print_1x2xVSCALExf32(%vector: vector<1x2x[4]xf32>) { + /// TODO: Support vector.print for arrays of scalable vectors. + %slice0 = vector.extract %vector[0, 1] : vector<[4]xf32> from vector<1x2x[4]xf32> + %slice1 = vector.extract %vector[0, 1] : vector<[4]xf32> from vector<1x2x[4]xf32> + vector.print %slice0 : vector<[4]xf32> + vector.print %slice1 : vector<[4]xf32> + return +} + +func.func @add_arrays_of_scalable_vectors(%a: memref<1x2x?xf32>, %b: memref<1x2x?xf32>) { + %c0 = arith.constant 0 : index + %c2 = arith.constant 2 : index + %c3 = arith.constant 2 : index + %cst = arith.constant 0.000000e+00 : f32 + %dim_a = memref.dim %a, %c2 : memref<1x2x?xf32> + %dim_b = memref.dim %b, %c2 : memref<1x2x?xf32> + %mask_a = vector.create_mask %c2, %c3, %dim_a : vector<1x2x[4]xi1> + %mask_b = vector.create_mask %c2, %c3, %dim_b : vector<1x2x[4]xi1> + + /// Print each of the vectors. + /// vscale is >= 1, so at least 4 elements will be printed. + + // CHECK-LABEL: Vector A + // CHECK-NEXT: ( 5, 5, 5, 5 + // CHECK-NEXT: ( 5, 5, 5, 5 + vector.print str "\nVector A" + %vector_a = vector.transfer_read %a[%c0, %c0, %c0], %cst, %mask_a {in_bounds = [true, true, true]} : memref<1x2x?xf32>, vector<1x2x[4]xf32> + func.call @print_1x2xVSCALExf32(%vector_a) : (vector<1x2x[4]xf32>) -> () + + // CHECK-LABEL: Vector B + // CHECK-NEXT: ( 4, 4, 4, 4 + // CHECK-NEXT: ( 4, 4, 4, 4 + vector.print str "\nVector B" + %vector_b = vector.transfer_read %b[%c0, %c0, %c0], %cst, %mask_b {in_bounds = [true, true, true]} : memref<1x2x?xf32>, vector<1x2x[4]xf32> + func.call @print_1x2xVSCALExf32(%vector_b) : (vector<1x2x[4]xf32>) -> () + + // CHECK-LABEL: Sum + // CHECK-NEXT: ( 9, 9, 9, 9 + // CHECK-NEXT: ( 9, 9, 9, 9 + vector.print str "\nSum" + %sum = arith.addf %vector_a, %vector_b : vector<1x2x[4]xf32> + func.call @print_1x2xVSCALExf32(%sum) : (vector<1x2x[4]xf32>) -> () + + return +} + +func.func @entry() { + %vscale = vector.vscale + + %c4 = arith.constant 4 : index + %c8 = arith.constant 8 : index + %f32_8 = arith.constant 8.0 : f32 + %f32_5 = arith.constant 5.0 : f32 + %f32_4 = arith.constant 4.0 : f32 + + %test_1_memref_size = arith.muli %vscale, %c8 : index + %test_1_memref = memref.alloca(%test_1_memref_size) : memref<3x?xf32> + + linalg.fill ins(%f32_8 : f32) outs(%test_1_memref :memref<3x?xf32>) + + vector.print str "=> Print and read 2D arrays of scalable vectors:" + func.call @read_and_print_2d_vector(%test_1_memref) : (memref<3x?xf32>) -> () + + vector.print str "\n====================\n" + + %test_2_memref_size = arith.muli %vscale, %c4 : index + %test_2_memref_a = memref.alloca(%test_2_memref_size) : memref<1x2x?xf32> + %test_2_memref_b = memref.alloca(%test_2_memref_size) : memref<1x2x?xf32> + + linalg.fill ins(%f32_5 : f32) outs(%test_2_memref_a :memref<1x2x?xf32>) + linalg.fill ins(%f32_4 : f32) outs(%test_2_memref_b :memref<1x2x?xf32>) + + vector.print str "=> Reading and adding two 3D arrays of scalable vectors:" + func.call @add_arrays_of_scalable_vectors( + %test_2_memref_a, %test_2_memref_b) : (memref<1x2x?xf32>, memref<1x2x?xf32>) -> () + + return +} From 16fbc45f48e895cc29b81fb36baa29f010d02881 Mon Sep 17 00:00:00 2001 From: Christudasan Devadasan Date: Thu, 26 Oct 2023 17:04:28 +0530 Subject: [PATCH 048/877] Revert "[AMDGPU] Cleanup hasUnwantedEffectsWhenEXECEmpty function (#70206)" This reverts commit 7ce613fc77af092dd6e9db71ce3747b75bc5616e. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 3 ++- ...ve-short-exec-branches-special-instructions.mir | 14 ++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 7046c37ef6efd..ffcd415a66648 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4010,7 +4010,8 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const // // However, executing them with EXEC = 0 causes them to operate on undefined // data, which we avoid by returning true here. - if (Opcode == AMDGPU::V_READFIRSTLANE_B32) + if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || + Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32) return true; return false; diff --git a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir index 8f3f70ea6287e..fe4aa6a9aea68 100644 --- a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir +++ b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir @@ -130,12 +130,13 @@ body: | --- -name: dont_skip_writelane_b32 +name: need_skip_writelane_b32 body: | - ; CHECK-LABEL: name: dont_skip_writelane_b32 + ; CHECK-LABEL: name: need_skip_writelane_b32 ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x40000000) + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) @@ -159,12 +160,13 @@ body: | ... --- -name: dont_skip_readlane_b32 +name: need_skip_readlane_b32 body: | - ; CHECK-LABEL: name: dont_skip_readlane_b32 + ; CHECK-LABEL: name: need_skip_readlane_b32 ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x40000000) + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) From 0e110fb429e85e7dc4a1e2de739c0d6e931204a7 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 26 Oct 2023 13:40:25 +0200 Subject: [PATCH 049/877] [libc] memmove optimizations (#70043) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Remove is_disjoint check for smaller sizes and reduce code bloat. inline_memmove may handle some small sizes as efficiently as inline_memcpy. For these sizes we may not do is_disjoint check. This both avoids additional code for the most frequent smaller sizes and removes code bloat (we don't need the memcpy logic for small sizes). Here we heavily rely on inlining and dead code elimination: from the first inline_memmove we should get only handling of small sizes, and from the second inline_memmove and inline_memcpy we should get only handling of larger sizes. 2. Use the memcpy thresholds for memmove. Memcpy thresholds were more carefully tuned. This becomes more important since we use memmove for all small sizes always now. 3. Fix boundary conditions for sizes = 16/32/64. See the added comment for explanations. Memmove function size drops from 885 to 715 bytes due to removed duplication. ``` │ baseline │ small-size │ │ sec/op │ sec/op vs base │ memmove/Google_A 3.208n ± 0% 2.911n ± 0% -9.25% (n=100) memmove/Google_B 4.113n ± 1% 3.428n ± 0% -16.65% (n=100) memmove/Google_D 5.838n ± 0% 4.158n ± 0% -28.78% (n=100) memmove/Google_S 4.712n ± 1% 3.899n ± 0% -17.25% (n=100) memmove/Google_U 3.609n ± 0% 3.247n ± 1% -10.02% (n=100) memmove/0 2.982n ± 0% 2.169n ± 0% -27.26% (n=50) memmove/1 3.253n ± 0% 2.168n ± 0% -33.34% (n=50) memmove/2 3.255n ± 0% 2.169n ± 0% -33.38% (n=50) memmove/3 3.259n ± 2% 2.175n ± 0% -33.27% (p=0.000 n=50) memmove/4 3.259n ± 0% 2.168n ± 5% -33.46% (p=0.000 n=50) memmove/5 2.488n ± 0% 1.926n ± 0% -22.57% (p=0.000 n=50) memmove/6 2.490n ± 0% 1.928n ± 0% -22.59% (p=0.000 n=50) memmove/7 2.492n ± 0% 1.927n ± 0% -22.65% (p=0.000 n=50) memmove/8 2.737n ± 0% 2.711n ± 0% -0.97% (p=0.000 n=50) memmove/9 2.736n ± 0% 2.711n ± 0% -0.94% (p=0.000 n=50) memmove/10 2.739n ± 0% 2.711n ± 0% -1.04% (p=0.000 n=50) memmove/11 2.740n ± 0% 2.711n ± 0% -1.07% (p=0.000 n=50) memmove/12 2.740n ± 0% 2.711n ± 0% -1.09% (p=0.000 n=50) memmove/13 2.744n ± 0% 2.711n ± 0% -1.22% (p=0.000 n=50) memmove/14 2.742n ± 0% 2.711n ± 0% -1.14% (p=0.000 n=50) memmove/15 2.742n ± 0% 2.711n ± 0% -1.15% (p=0.000 n=50) memmove/16 2.997n ± 0% 2.981n ± 0% -0.52% (p=0.000 n=50) memmove/17 2.998n ± 0% 2.981n ± 0% -0.55% (p=0.000 n=50) memmove/18 2.998n ± 0% 2.981n ± 0% -0.55% (p=0.000 n=50) memmove/19 2.999n ± 0% 2.982n ± 0% -0.59% (p=0.000 n=50) memmove/20 2.998n ± 0% 2.981n ± 0% -0.55% (p=0.000 n=50) memmove/21 3.000n ± 0% 2.981n ± 0% -0.61% (p=0.000 n=50) memmove/22 3.002n ± 0% 2.981n ± 0% -0.68% (p=0.000 n=50) memmove/23 3.002n ± 0% 2.981n ± 0% -0.67% (p=0.000 n=50) memmove/24 3.002n ± 0% 2.981n ± 0% -0.70% (n=50) memmove/25 3.002n ± 0% 2.981n ± 0% -0.68% (p=0.000 n=50) memmove/26 3.004n ± 0% 2.982n ± 0% -0.74% (p=0.000 n=50) memmove/27 3.005n ± 0% 2.981n ± 0% -0.79% (n=50) memmove/28 3.005n ± 0% 2.982n ± 0% -0.77% (n=50) memmove/29 3.009n ± 0% 2.981n ± 0% -0.92% (n=50) memmove/30 3.008n ± 0% 2.981n ± 0% -0.89% (n=50) memmove/31 3.007n ± 0% 2.982n ± 0% -0.86% (n=50) memmove/32 3.540n ± 0% 2.998n ± 0% -15.31% (p=0.000 n=50) memmove/33 3.544n ± 0% 2.997n ± 0% -15.44% (p=0.000 n=50) memmove/34 3.546n ± 0% 2.999n ± 0% -15.42% (n=50) memmove/35 3.545n ± 0% 2.999n ± 0% -15.40% (n=50) memmove/36 3.548n ± 0% 2.998n ± 0% -15.52% (p=0.000 n=50) memmove/37 3.546n ± 0% 3.000n ± 0% -15.41% (n=50) memmove/38 3.549n ± 0% 2.999n ± 0% -15.49% (p=0.000 n=50) memmove/39 3.549n ± 0% 2.999n ± 0% -15.48% (p=0.000 n=50) memmove/40 3.549n ± 0% 3.000n ± 0% -15.46% (p=0.000 n=50) memmove/41 3.550n ± 0% 3.001n ± 0% -15.47% (n=50) memmove/42 3.549n ± 0% 3.001n ± 0% -15.43% (n=50) memmove/43 3.552n ± 0% 3.001n ± 0% -15.52% (p=0.000 n=50) memmove/44 3.552n ± 0% 3.001n ± 0% -15.51% (n=50) memmove/45 3.552n ± 0% 3.002n ± 0% -15.48% (n=50) memmove/46 3.554n ± 0% 3.001n ± 0% -15.55% (p=0.000 n=50) memmove/47 3.556n ± 0% 3.002n ± 0% -15.58% (p=0.000 n=50) memmove/48 3.555n ± 0% 3.003n ± 0% -15.54% (n=50) memmove/49 3.557n ± 0% 3.002n ± 0% -15.59% (p=0.000 n=50) memmove/50 3.557n ± 0% 3.004n ± 0% -15.55% (p=0.000 n=50) memmove/51 3.556n ± 0% 3.004n ± 0% -15.53% (p=0.000 n=50) memmove/52 3.561n ± 0% 3.004n ± 0% -15.65% (p=0.000 n=50) memmove/53 3.558n ± 0% 3.004n ± 0% -15.57% (p=0.000 n=50) memmove/54 3.561n ± 0% 3.005n ± 0% -15.62% (n=50) memmove/55 3.560n ± 0% 3.006n ± 0% -15.57% (n=50) memmove/56 3.562n ± 0% 3.006n ± 0% -15.60% (p=0.000 n=50) memmove/57 3.563n ± 0% 3.006n ± 0% -15.64% (n=50) memmove/58 3.565n ± 0% 3.007n ± 0% -15.64% (p=0.000 n=50) memmove/59 3.564n ± 0% 3.006n ± 0% -15.66% (p=0.000 n=50) memmove/60 3.570n ± 0% 3.008n ± 0% -15.74% (p=0.000 n=50) memmove/61 3.566n ± 0% 3.009n ± 0% -15.63% (p=0.000 n=50) memmove/62 3.567n ± 0% 3.007n ± 0% -15.70% (p=0.000 n=50) memmove/63 3.568n ± 0% 3.008n ± 0% -15.71% (p=0.000 n=50) memmove/64 4.104n ± 0% 3.008n ± 0% -26.70% (p=0.000 n=50) memmove/65 4.126n ± 0% 3.662n ± 0% -11.26% (p=0.000 n=50) memmove/66 4.128n ± 0% 3.662n ± 0% -11.29% (n=50) memmove/67 4.129n ± 0% 3.662n ± 0% -11.31% (n=50) memmove/68 4.129n ± 0% 3.661n ± 0% -11.33% (p=0.000 n=50) memmove/69 4.130n ± 0% 3.662n ± 0% -11.34% (p=0.000 n=50) memmove/70 4.130n ± 0% 3.662n ± 0% -11.33% (n=50) memmove/71 4.132n ± 0% 3.662n ± 0% -11.38% (p=0.000 n=50) memmove/72 4.131n ± 0% 3.661n ± 0% -11.39% (n=50) memmove/73 4.135n ± 0% 3.661n ± 0% -11.45% (p=0.000 n=50) memmove/74 4.137n ± 0% 3.662n ± 0% -11.49% (n=50) memmove/75 4.138n ± 0% 3.662n ± 0% -11.51% (p=0.000 n=50) memmove/76 4.139n ± 0% 3.661n ± 0% -11.56% (p=0.000 n=50) memmove/77 4.136n ± 0% 3.662n ± 0% -11.47% (p=0.000 n=50) memmove/78 4.143n ± 0% 3.661n ± 0% -11.62% (p=0.000 n=50) memmove/79 4.142n ± 0% 3.661n ± 0% -11.60% (n=50) memmove/80 4.142n ± 0% 3.661n ± 0% -11.62% (p=0.000 n=50) memmove/81 4.140n ± 0% 3.661n ± 0% -11.57% (n=50) memmove/82 4.146n ± 0% 3.661n ± 0% -11.69% (n=50) memmove/83 4.143n ± 0% 3.661n ± 0% -11.63% (p=0.000 n=50) memmove/84 4.143n ± 0% 3.661n ± 0% -11.63% (n=50) memmove/85 4.147n ± 0% 3.661n ± 0% -11.73% (p=0.000 n=50) memmove/86 4.142n ± 0% 3.661n ± 0% -11.62% (p=0.000 n=50) memmove/87 4.147n ± 0% 3.661n ± 0% -11.72% (p=0.000 n=50) memmove/88 4.148n ± 0% 3.661n ± 0% -11.74% (n=50) memmove/89 4.152n ± 0% 3.661n ± 0% -11.84% (n=50) memmove/90 4.151n ± 0% 3.661n ± 0% -11.81% (n=50) memmove/91 4.150n ± 0% 3.661n ± 0% -11.78% (n=50) memmove/92 4.153n ± 0% 3.661n ± 0% -11.86% (n=50) memmove/93 4.158n ± 0% 3.661n ± 0% -11.95% (n=50) memmove/94 4.157n ± 0% 3.661n ± 0% -11.95% (p=0.000 n=50) memmove/95 4.155n ± 0% 3.661n ± 0% -11.90% (p=0.000 n=50) memmove/96 4.149n ± 0% 3.660n ± 0% -11.79% (n=50) memmove/97 4.157n ± 0% 3.661n ± 0% -11.94% (n=50) memmove/98 4.157n ± 0% 3.661n ± 0% -11.94% (n=50) memmove/99 4.168n ± 0% 3.661n ± 0% -12.17% (p=0.000 n=50) memmove/100 4.159n ± 0% 3.660n ± 0% -12.00% (p=0.000 n=50) memmove/101 4.161n ± 0% 3.660n ± 0% -12.03% (p=0.000 n=50) memmove/102 4.165n ± 0% 3.660n ± 0% -12.12% (p=0.000 n=50) memmove/103 4.164n ± 0% 3.661n ± 0% -12.08% (n=50) memmove/104 4.164n ± 0% 3.660n ± 0% -12.11% (n=50) memmove/105 4.165n ± 0% 3.660n ± 0% -12.12% (p=0.000 n=50) memmove/106 4.166n ± 0% 3.660n ± 0% -12.15% (n=50) memmove/107 4.171n ± 0% 3.660n ± 1% -12.26% (p=0.000 n=50) memmove/108 4.173n ± 0% 3.660n ± 0% -12.30% (p=0.000 n=50) memmove/109 4.170n ± 0% 3.660n ± 0% -12.24% (n=50) memmove/110 4.174n ± 0% 3.660n ± 0% -12.31% (n=50) memmove/111 4.176n ± 0% 3.660n ± 0% -12.35% (p=0.000 n=50) memmove/112 4.174n ± 0% 3.659n ± 0% -12.34% (p=0.000 n=50) memmove/113 4.176n ± 0% 3.660n ± 0% -12.35% (n=50) memmove/114 4.182n ± 0% 3.660n ± 0% -12.49% (n=50) memmove/115 4.185n ± 0% 3.660n ± 0% -12.55% (n=50) memmove/116 4.184n ± 0% 3.659n ± 0% -12.54% (n=50) memmove/117 4.182n ± 0% 3.660n ± 0% -12.50% (n=50) memmove/118 4.188n ± 0% 3.660n ± 0% -12.61% (n=50) memmove/119 4.186n ± 0% 3.660n ± 0% -12.57% (p=0.000 n=50) memmove/120 4.189n ± 0% 3.659n ± 0% -12.63% (n=50) memmove/121 4.187n ± 0% 3.660n ± 0% -12.60% (n=50) memmove/122 4.186n ± 0% 3.660n ± 0% -12.58% (n=50) memmove/123 4.187n ± 0% 3.660n ± 0% -12.60% (n=50) memmove/124 4.189n ± 0% 3.659n ± 0% -12.65% (n=50) memmove/125 4.195n ± 0% 3.659n ± 0% -12.78% (n=50) memmove/126 4.197n ± 0% 3.659n ± 0% -12.81% (n=50) memmove/127 4.194n ± 0% 3.659n ± 0% -12.75% (n=50) memmove/128 5.035n ± 0% 3.659n ± 0% -27.32% (n=50) memmove/129 5.127n ± 0% 5.164n ± 0% +0.73% (p=0.000 n=50) memmove/130 5.130n ± 0% 5.176n ± 0% +0.88% (p=0.000 n=50) memmove/131 5.127n ± 0% 5.180n ± 0% +1.05% (p=0.000 n=50) memmove/132 5.131n ± 0% 5.169n ± 0% +0.75% (p=0.000 n=50) memmove/133 5.137n ± 0% 5.179n ± 0% +0.81% (p=0.000 n=50) memmove/134 5.140n ± 0% 5.178n ± 0% +0.74% (p=0.000 n=50) memmove/135 5.141n ± 0% 5.187n ± 0% +0.88% (p=0.000 n=50) memmove/136 5.133n ± 0% 5.184n ± 0% +0.99% (p=0.000 n=50) memmove/137 5.148n ± 0% 5.186n ± 0% +0.73% (p=0.000 n=50) memmove/138 5.143n ± 0% 5.189n ± 0% +0.88% (p=0.000 n=50) memmove/139 5.142n ± 0% 5.192n ± 0% +0.97% (p=0.000 n=50) memmove/140 5.141n ± 0% 5.192n ± 0% +1.01% (p=0.000 n=50) memmove/141 5.155n ± 0% 5.188n ± 0% +0.64% (p=0.000 n=50) memmove/142 5.146n ± 0% 5.192n ± 0% +0.90% (p=0.000 n=50) memmove/143 5.142n ± 0% 5.203n ± 0% +1.19% (p=0.000 n=50) memmove/144 5.146n ± 0% 5.197n ± 0% +0.99% (p=0.000 n=50) memmove/145 5.146n ± 0% 5.196n ± 0% +0.97% (p=0.000 n=50) memmove/146 5.151n ± 0% 5.207n ± 0% +1.10% (p=0.000 n=50) memmove/147 5.151n ± 0% 5.205n ± 0% +1.06% (p=0.000 n=50) memmove/148 5.156n ± 0% 5.190n ± 0% +0.66% (p=0.000 n=50) memmove/149 5.158n ± 0% 5.212n ± 0% +1.04% (p=0.000 n=50) memmove/150 5.160n ± 0% 5.203n ± 0% +0.84% (p=0.000 n=50) memmove/151 5.167n ± 0% 5.210n ± 0% +0.83% (p=0.000 n=50) memmove/152 5.157n ± 0% 5.206n ± 0% +0.94% (p=0.000 n=50) memmove/153 5.170n ± 0% 5.211n ± 0% +0.80% (p=0.000 n=50) memmove/154 5.169n ± 0% 5.222n ± 0% +1.02% (p=0.000 n=50) memmove/155 5.171n ± 0% 5.215n ± 0% +0.87% (p=0.000 n=50) memmove/156 5.174n ± 0% 5.214n ± 0% +0.78% (p=0.000 n=50) memmove/157 5.171n ± 0% 5.218n ± 0% +0.92% (p=0.000 n=50) memmove/158 5.168n ± 0% 5.224n ± 0% +1.09% (p=0.000 n=50) memmove/159 5.179n ± 0% 5.218n ± 0% +0.76% (p=0.000 n=50) memmove/160 5.170n ± 0% 5.219n ± 0% +0.95% (p=0.000 n=50) memmove/161 5.187n ± 0% 5.220n ± 0% +0.64% (p=0.000 n=50) memmove/162 5.189n ± 0% 5.234n ± 0% +0.86% (p=0.000 n=50) memmove/163 5.199n ± 0% 5.250n ± 0% +0.99% (p=0.000 n=50) memmove/164 5.205n ± 0% 5.260n ± 0% +1.04% (p=0.000 n=50) memmove/165 5.208n ± 0% 5.261n ± 0% +1.01% (p=0.000 n=50) memmove/166 5.227n ± 0% 5.275n ± 0% +0.91% (p=0.000 n=50) memmove/167 5.233n ± 0% 5.281n ± 0% +0.92% (p=0.000 n=50) memmove/168 5.236n ± 0% 5.295n ± 0% +1.12% (p=0.000 n=50) memmove/169 5.256n ± 0% 5.297n ± 0% +0.79% (p=0.000 n=50) memmove/170 5.259n ± 0% 5.302n ± 0% +0.80% (p=0.000 n=50) memmove/171 5.269n ± 0% 5.321n ± 0% +0.97% (p=0.000 n=50) memmove/172 5.266n ± 0% 5.318n ± 0% +0.98% (p=0.000 n=50) memmove/173 5.272n ± 0% 5.330n ± 0% +1.09% (p=0.000 n=50) memmove/174 5.284n ± 0% 5.331n ± 0% +0.89% (p=0.000 n=50) memmove/175 5.284n ± 0% 5.322n ± 0% +0.72% (p=0.000 n=50) memmove/176 5.298n ± 0% 5.337n ± 0% +0.74% (p=0.000 n=50) memmove/177 5.282n ± 0% 5.338n ± 0% +1.04% (p=0.000 n=50) memmove/178 5.299n ± 0% 5.337n ± 0% +0.71% (p=0.000 n=50) memmove/179 5.296n ± 0% 5.343n ± 0% +0.88% (p=0.000 n=50) memmove/180 5.292n ± 0% 5.343n ± 0% +0.97% (p=0.000 n=50) memmove/181 5.303n ± 0% 5.335n ± 0% +0.60% (p=0.000 n=50) memmove/182 5.305n ± 0% 5.338n ± 0% +0.62% (p=0.000 n=50) memmove/183 5.298n ± 0% 5.329n ± 0% +0.59% (p=0.000 n=50) memmove/184 5.299n ± 0% 5.333n ± 0% +0.64% (p=0.000 n=50) memmove/185 5.291n ± 0% 5.330n ± 0% +0.73% (p=0.000 n=50) memmove/186 5.296n ± 0% 5.332n ± 0% +0.68% (p=0.000 n=50) memmove/187 5.297n ± 0% 5.320n ± 0% +0.44% (p=0.000 n=50) memmove/188 5.286n ± 0% 5.314n ± 0% +0.53% (p=0.000 n=50) memmove/189 5.293n ± 0% 5.318n ± 0% +0.46% (p=0.000 n=50) memmove/190 5.294n ± 0% 5.318n ± 0% +0.45% (p=0.000 n=50) memmove/191 5.292n ± 0% 5.314n ± 0% +0.40% (p=0.032 n=50) memmove/192 5.272n ± 0% 5.304n ± 0% +0.60% (p=0.000 n=50) memmove/193 5.279n ± 0% 5.310n ± 0% +0.57% (p=0.000 n=50) memmove/194 5.294n ± 0% 5.308n ± 0% +0.26% (p=0.018 n=50) memmove/195 5.302n ± 0% 5.311n ± 0% +0.18% (p=0.010 n=50) memmove/196 5.301n ± 0% 5.316n ± 0% +0.28% (p=0.023 n=50) memmove/197 5.302n ± 0% 5.327n ± 0% +0.47% (p=0.000 n=50) memmove/198 5.310n ± 0% 5.326n ± 0% +0.30% (p=0.003 n=50) memmove/199 5.303n ± 0% 5.319n ± 0% +0.30% (p=0.009 n=50) memmove/200 5.312n ± 0% 5.330n ± 0% +0.35% (p=0.001 n=50) memmove/201 5.307n ± 0% 5.333n ± 0% +0.50% (p=0.000 n=50) memmove/202 5.311n ± 0% 5.334n ± 0% +0.44% (p=0.000 n=50) memmove/203 5.313n ± 0% 5.335n ± 0% +0.41% (p=0.006 n=50) memmove/204 5.312n ± 0% 5.332n ± 0% +0.36% (p=0.002 n=50) memmove/205 5.318n ± 0% 5.345n ± 0% +0.50% (p=0.000 n=50) memmove/206 5.311n ± 0% 5.333n ± 0% +0.42% (p=0.002 n=50) memmove/207 5.310n ± 0% 5.338n ± 0% +0.52% (p=0.000 n=50) memmove/208 5.319n ± 0% 5.341n ± 0% +0.40% (p=0.004 n=50) memmove/209 5.330n ± 0% 5.346n ± 0% +0.30% (p=0.004 n=50) memmove/210 5.329n ± 0% 5.349n ± 0% +0.38% (p=0.002 n=50) memmove/211 5.318n ± 0% 5.340n ± 0% +0.41% (p=0.000 n=50) memmove/212 5.339n ± 0% 5.343n ± 0% ~ (p=0.396 n=50) memmove/213 5.329n ± 0% 5.343n ± 0% +0.25% (p=0.017 n=50) memmove/214 5.339n ± 0% 5.358n ± 0% +0.35% (p=0.035 n=50) memmove/215 5.342n ± 0% 5.346n ± 0% ~ (p=0.063 n=50) memmove/216 5.338n ± 0% 5.359n ± 0% +0.39% (p=0.002 n=50) memmove/217 5.341n ± 0% 5.362n ± 0% +0.39% (p=0.015 n=50) memmove/218 5.354n ± 0% 5.373n ± 0% +0.36% (p=0.041 n=50) memmove/219 5.352n ± 0% 5.362n ± 0% ~ (p=0.143 n=50) memmove/220 5.344n ± 0% 5.370n ± 0% +0.50% (p=0.001 n=50) memmove/221 5.345n ± 0% 5.373n ± 0% +0.53% (p=0.000 n=50) memmove/222 5.348n ± 0% 5.360n ± 0% +0.23% (p=0.014 n=50) memmove/223 5.354n ± 0% 5.377n ± 0% +0.43% (p=0.024 n=50) memmove/224 5.352n ± 0% 5.363n ± 0% ~ (p=0.052 n=50) memmove/225 5.372n ± 0% 5.380n ± 0% ~ (p=0.481 n=50) memmove/226 5.368n ± 0% 5.386n ± 0% +0.34% (p=0.004 n=50) memmove/227 5.386n ± 0% 5.402n ± 0% +0.29% (p=0.028 n=50) memmove/228 5.400n ± 0% 5.408n ± 0% ~ (p=0.174 n=50) memmove/229 5.423n ± 0% 5.427n ± 0% ~ (p=0.444 n=50) memmove/230 5.411n ± 0% 5.429n ± 0% +0.33% (p=0.020 n=50) memmove/231 5.420n ± 0% 5.433n ± 0% +0.24% (p=0.034 n=50) memmove/232 5.435n ± 0% 5.441n ± 0% ~ (p=0.235 n=50) memmove/233 5.446n ± 0% 5.462n ± 0% ~ (p=0.590 n=50) memmove/234 5.467n ± 0% 5.461n ± 0% ~ (p=0.921 n=50) memmove/235 5.472n ± 0% 5.478n ± 0% ~ (p=0.883 n=50) memmove/236 5.466n ± 0% 5.478n ± 0% ~ (p=0.324 n=50) memmove/237 5.471n ± 0% 5.489n ± 0% ~ (p=0.132 n=50) memmove/238 5.485n ± 0% 5.489n ± 0% ~ (p=0.460 n=50) memmove/239 5.484n ± 0% 5.488n ± 0% ~ (p=0.833 n=50) memmove/240 5.483n ± 0% 5.495n ± 0% ~ (p=0.095 n=50) memmove/241 5.498n ± 0% 5.514n ± 0% ~ (p=0.077 n=50) memmove/242 5.518n ± 0% 5.517n ± 0% ~ (p=0.481 n=50) memmove/243 5.514n ± 0% 5.511n ± 0% ~ (p=0.503 n=50) memmove/244 5.510n ± 0% 5.497n ± 0% -0.24% (p=0.038 n=50) memmove/245 5.516n ± 0% 5.505n ± 0% ~ (p=0.317 n=50) memmove/246 5.513n ± 1% 5.494n ± 0% ~ (p=0.147 n=50) memmove/247 5.518n ± 0% 5.499n ± 0% -0.36% (p=0.011 n=50) memmove/248 5.503n ± 0% 5.492n ± 0% ~ (p=0.267 n=50) memmove/249 5.498n ± 0% 5.497n ± 0% ~ (p=0.765 n=50) memmove/250 5.485n ± 0% 5.493n ± 0% ~ (p=0.348 n=50) memmove/251 5.503n ± 0% 5.482n ± 0% -0.37% (p=0.013 n=50) memmove/252 5.497n ± 0% 5.485n ± 0% ~ (p=0.077 n=50) memmove/253 5.489n ± 0% 5.496n ± 0% ~ (p=0.850 n=50) memmove/254 5.497n ± 0% 5.491n ± 0% ~ (p=0.548 n=50) memmove/255 5.484n ± 1% 5.494n ± 0% ~ (p=0.888 n=50) memmove/256 6.952n ± 0% 7.676n ± 0% +10.41% (p=0.000 n=50) geomean 4.406n 4.127n -6.33% ``` --- libc/src/string/memmove.cpp | 8 +- libc/src/string/memory_utils/inline_memmove.h | 44 ++++++++-- .../memory_utils/x86_64/inline_memmove.h | 86 +++++++++++++++---- 3 files changed, 114 insertions(+), 24 deletions(-) diff --git a/libc/src/string/memmove.cpp b/libc/src/string/memmove.cpp index 7d473afc0b42e..19e38a3c8bdbe 100644 --- a/libc/src/string/memmove.cpp +++ b/libc/src/string/memmove.cpp @@ -15,10 +15,16 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(void *, memmove, (void *dst, const void *src, size_t count)) { + // Memmove may handle some small sizes as efficiently as inline_memcpy. + // For these sizes we may not do is_disjoint check. + // This both avoids additional code for the most frequent smaller sizes + // and removes code bloat (we don't need the memcpy logic for small sizes). + if (inline_memmove_small_size(dst, src, count)) + return dst; if (is_disjoint(dst, src, count)) inline_memcpy(dst, src, count); else - inline_memmove(dst, src, count); + inline_memmove_follow_up(dst, src, count); return dst; } diff --git a/libc/src/string/memory_utils/inline_memmove.h b/libc/src/string/memory_utils/inline_memmove.h index f72ea24ab538d..30c2c3ddbf1bb 100644 --- a/libc/src/string/memory_utils/inline_memmove.h +++ b/libc/src/string/memory_utils/inline_memmove.h @@ -13,28 +13,58 @@ #if defined(LIBC_TARGET_ARCH_IS_X86) #include "src/string/memory_utils/x86_64/inline_memmove.h" -#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE inline_memmove_x86 +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE_SMALL_SIZE \ + inline_memmove_small_size_x86 +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE_FOLLOW_UP \ + inline_memmove_follow_up_x86 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) #include "src/string/memory_utils/aarch64/inline_memmove.h" -#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE inline_memmove_aarch64 +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE_SMALL_SIZE \ + inline_memmove_no_small_size +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE_FOLLOW_UP inline_memmove_aarch64 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) #include "src/string/memory_utils/riscv/inline_memmove.h" -#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE inline_memmove_riscv +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE_SMALL_SIZE \ + inline_memmove_no_small_size +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE_FOLLOW_UP inline_memmove_riscv #elif defined(LIBC_TARGET_ARCH_IS_ARM) #include "src/string/memory_utils/generic/byte_per_byte.h" -#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE inline_memmove_byte_per_byte +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE_SMALL_SIZE \ + inline_memmove_no_small_size +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE_FOLLOW_UP \ + inline_memmove_byte_per_byte #elif defined(LIBC_TARGET_ARCH_IS_GPU) #include "src/string/memory_utils/generic/builtin.h" -#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE inline_memmove_builtin +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE_SMALL_SIZE \ + inline_memmove_no_small_size +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE_FOLLOW_UP inline_memmove_builtin #else #error "Unsupported architecture" #endif namespace LIBC_NAMESPACE { +LIBC_INLINE constexpr bool inline_memmove_no_small_size(void *, const void *, + size_t) { + return false; +} + +LIBC_INLINE bool inline_memmove_small_size(void *dst, const void *src, + size_t count) { + return LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE_SMALL_SIZE( + reinterpret_cast(dst), reinterpret_cast(src), count); +} + +LIBC_INLINE void inline_memmove_follow_up(void *dst, const void *src, + size_t count) { + LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE_FOLLOW_UP( + reinterpret_cast(dst), reinterpret_cast(src), count); +} + LIBC_INLINE void inline_memmove(void *dst, const void *src, size_t count) { - LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE(reinterpret_cast(dst), - reinterpret_cast(src), count); + if (inline_memmove_small_size(dst, src, count)) + return; + inline_memmove_follow_up(dst, src, count); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/string/memory_utils/x86_64/inline_memmove.h b/libc/src/string/memory_utils/x86_64/inline_memmove.h index 95ad07f752195..879b36eaa6734 100644 --- a/libc/src/string/memory_utils/x86_64/inline_memmove.h +++ b/libc/src/string/memory_utils/x86_64/inline_memmove.h @@ -18,40 +18,94 @@ namespace LIBC_NAMESPACE { -LIBC_INLINE void inline_memmove_x86(Ptr dst, CPtr src, size_t count) { +LIBC_INLINE bool inline_memmove_small_size_x86(Ptr dst, CPtr src, + size_t count) { #if defined(__AVX512F__) + constexpr size_t vector_size = 64; using uint128_t = generic_v128; using uint256_t = generic_v256; using uint512_t = generic_v512; #elif defined(__AVX__) + constexpr size_t vector_size = 32; using uint128_t = generic_v128; using uint256_t = generic_v256; using uint512_t = cpp::array; #elif defined(__SSE2__) + constexpr size_t vector_size = 16; using uint128_t = generic_v128; using uint256_t = cpp::array; using uint512_t = cpp::array; #else + constexpr size_t vector_size = 8; using uint128_t = cpp::array; using uint256_t = cpp::array; using uint512_t = cpp::array; #endif + (void)vector_size; if (count == 0) - return; - if (count == 1) - return generic::Memmove::block(dst, src); - if (count <= 4) - return generic::Memmove::head_tail(dst, src, count); - if (count <= 8) - return generic::Memmove::head_tail(dst, src, count); - if (count <= 16) - return generic::Memmove::head_tail(dst, src, count); - if (count <= 32) - return generic::Memmove::head_tail(dst, src, count); - if (count <= 64) - return generic::Memmove::head_tail(dst, src, count); - if (count <= 128) - return generic::Memmove::head_tail(dst, src, count); + return true; + if (count == 1) { + generic::Memmove::block(dst, src); + return true; + } + if (count == 2) { + generic::Memmove::block(dst, src); + return true; + } + if (count == 3) { + generic::Memmove>::block(dst, src); + return true; + } + if (count == 4) { + generic::Memmove::block(dst, src); + return true; + } + if (count < 8) { + generic::Memmove::head_tail(dst, src, count); + return true; + } + // If count is equal to a power of 2, we can handle it as head-tail + // of both smaller size and larger size (head-tail are either + // non-overlapping for smaller size, or completely collapsed + // for larger size). It seems to be more profitable to do the copy + // with the larger size, if it's natively supported (e.g. doing + // 2 collapsed 32-byte moves for count=64 if AVX2 is supported). + // But it's not profitable to use larger size if it's not natively + // supported: we will both use more instructions and handle fewer + // sizes in earlier branches. + if (vector_size >= 16 ? count < 16 : count <= 16) { + generic::Memmove::head_tail(dst, src, count); + return true; + } + if (vector_size >= 32 ? count < 32 : count <= 32) { + generic::Memmove::head_tail(dst, src, count); + return true; + } + if (vector_size >= 64 ? count < 64 : count <= 64) { + generic::Memmove::head_tail(dst, src, count); + return true; + } + if (count <= 128) { + generic::Memmove::head_tail(dst, src, count); + return true; + } + return false; +} + +LIBC_INLINE void inline_memmove_follow_up_x86(Ptr dst, CPtr src, size_t count) { +#if defined(__AVX512F__) + using uint256_t = generic_v256; + using uint512_t = generic_v512; +#elif defined(__AVX__) + using uint256_t = generic_v256; + using uint512_t = cpp::array; +#elif defined(__SSE2__) + using uint256_t = cpp::array; + using uint512_t = cpp::array; +#else + using uint256_t = cpp::array; + using uint512_t = cpp::array; +#endif if (dst < src) { generic::Memmove::align_forward(dst, src, count); return generic::Memmove::loop_and_tail_forward(dst, src, count); From 9db8f99b61e4c5bab45a41a4da2ab169954d8ff1 Mon Sep 17 00:00:00 2001 From: Johannes Reifferscheid Date: Thu, 26 Oct 2023 13:41:14 +0200 Subject: [PATCH 050/877] [Bazel] Fixes for 96e040a. --- .../llvm-project-overlay/mlir/BUILD.bazel | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index abe613b45c80a..2b6a82111d879 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -2103,18 +2103,38 @@ cc_library( ], ) +gentbl_cc_library( + name = "ArmSVEPassIncGen", + tbl_outs = [ + ( + [ + "-gen-pass-decls", + "-name=ArmSVE", + ], + "include/mlir/Dialect/ArmSVE/Transforms/Passes.h.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/ArmSVE/Transforms/Passes.td", + deps = [":PassBaseTdFiles"], +) + cc_library( name = "ArmSVETransforms", srcs = glob(["lib/Dialect/ArmSVE/Transforms/*.cpp"]), - hdrs = ["include/mlir/Dialect/ArmSVE/Transforms/Transforms.h"], + hdrs = glob(["include/mlir/Dialect/ArmSVE/Transforms/*.h"]), includes = ["include"], deps = [ ":ArmSVEDialect", + ":ArmSVEPassIncGen", ":DialectUtils", ":FuncDialect", ":IR", ":LLVMCommonConversion", ":LLVMDialect", + ":MemRefDialect", + ":Pass", + ":TransformUtils", ":VectorDialect", ], ) From 8a80e331506e3e3db390ed0b482c7cbe216f7afc Mon Sep 17 00:00:00 2001 From: bjacob Date: Thu, 26 Oct 2023 07:47:00 -0400 Subject: [PATCH 051/877] Add `isBatchVecmat` utilities for `linalg.batch_vecmat` (#70284) `linalg.batch_vecmat` was just added in https://github.com/llvm/llvm-project/pull/70218, but I forgot then to add the standard `isBatchVecmat` utilities --- .../Dialect/Linalg/IR/LinalgInterfaces.td | 11 ++++ .../mlir/Dialect/Utils/StructuredOpsUtils.h | 6 +++ mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp | 25 +++++++++ .../Dialect/Utils/StructuredOpsUtilsTest.cpp | 52 +++++++++++++++++++ 4 files changed, 94 insertions(+) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td index 44e82f452b3ce..69ca888a8acdb 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td @@ -98,6 +98,17 @@ def LinalgContractionOpInterface : OpInterface<"ContractionOpInterface"> { return mlir::isVecmat($_op.getIndexingMaps()); }]>, InterfaceMethod< + /*desc=*/[{ + Returns whether the given op has indexing maps that correspond to a + batched vector-matrix multiplication. + }], + /*retTy=*/"bool", + /*methodName=*/"isBatchVecmat", + /*args=*/(ins), + /*methodBody=*/[{ + return mlir::isBatchVecmat($_op.getIndexingMaps()); + }]>, + InterfaceMethod< /*desc=*/[{ Returns whether the given op has indexing maps that correspond to a matrix-vector multiplication. diff --git a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h index 225b9f287d340..134c5569fbb2f 100644 --- a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h +++ b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h @@ -55,6 +55,12 @@ bool isRowMajorBatchMatmul(ArrayAttr indexingMaps); /// performed within the reduction. bool isVecmat(ArrayAttr indexingMaps); +/// Tests whether the given maps describe a batch vector matrix multiplication. +/// The test is permutation-invariant. Note that this only checks the affine +/// maps from an operation, so does not perform any checks on the math being +/// performed within the reduction. +bool isBatchVecmat(ArrayAttr indexingMaps); + /// Tests whether the given maps describe a matrix vector multiplication. The /// test is permutation-invariant. Note that this only checks the affine maps /// from an operation, so does not perform any checks on the math being diff --git a/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp b/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp index 641ddf3f91cb2..383ef1cea53fd 100644 --- a/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp +++ b/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp @@ -120,6 +120,31 @@ bool mlir::isVecmat(ArrayAttr indexingMaps) { return indexingMaps == maps; } +bool mlir::isBatchVecmat(ArrayAttr indexingMaps) { + if (indexingMaps.size() != 3) + return false; + AffineMap map0 = cast(indexingMaps[0]).getValue(); + AffineMap map1 = cast(indexingMaps[1]).getValue(); + AffineMap map2 = cast(indexingMaps[2]).getValue(); + + if (map0.getNumResults() != 2 || map1.getNumResults() != 3 || + map2.getNumResults() != 2 || map0.getNumInputs() != 3 || + map1.getNumInputs() != 3 || map2.getNumInputs() != 3) { + return false; + } + + // Extract dimensions for B*K * B*K*N -> B*N + AffineExpr b = map0.getResult(0); + AffineExpr k = map0.getResult(1); + AffineExpr n = map2.getResult(1); + auto *context = indexingMaps.getContext(); + auto mapA = AffineMapAttr::get(AffineMap::get(3, 0, {b, k}, context)); + auto mapB = AffineMapAttr::get(AffineMap::get(3, 0, {b, k, n}, context)); + auto mapC = AffineMapAttr::get(AffineMap::get(3, 0, {b, n}, context)); + auto maps = ArrayAttr::get(context, {mapA, mapB, mapC}); + return indexingMaps == maps; +} + bool mlir::isMatvec(ArrayAttr indexingMaps) { if (indexingMaps.size() != 3) return false; diff --git a/mlir/unittests/Dialect/Utils/StructuredOpsUtilsTest.cpp b/mlir/unittests/Dialect/Utils/StructuredOpsUtilsTest.cpp index 3f576bacebf6a..d257fc5d6e041 100644 --- a/mlir/unittests/Dialect/Utils/StructuredOpsUtilsTest.cpp +++ b/mlir/unittests/Dialect/Utils/StructuredOpsUtilsTest.cpp @@ -370,4 +370,56 @@ TEST(isBatchMatvec, WrongDimOrderMatrix) { EXPECT_THAT(maps, Not(Truly(isBatchMatvec))); } +TEST(isBatchVecmat, Simple) { + MLIRContext context; + + AffineExpr batch, k, n; + bindDims(&context, batch, k, n); + auto mapA = AffineMapAttr::get(AffineMap::get(3, 0, {batch, k}, &context)); + auto mapB = AffineMapAttr::get(AffineMap::get(3, 0, {batch, k, n}, &context)); + auto mapC = AffineMapAttr::get(AffineMap::get(3, 0, {batch, n}, &context)); + auto maps = ArrayAttr::get(&context, {mapA, mapB, mapC}); + + EXPECT_THAT(maps, Truly(isBatchVecmat)); +} + +TEST(isBatchVecmat, BindingSwapped) { + MLIRContext context; + + AffineExpr batch, k, n; + bindDims(&context, batch, n, k); // bind in different order + auto mapA = AffineMapAttr::get(AffineMap::get(3, 0, {batch, k}, &context)); + auto mapB = AffineMapAttr::get(AffineMap::get(3, 0, {batch, k, n}, &context)); + auto mapC = AffineMapAttr::get(AffineMap::get(3, 0, {batch, n}, &context)); + auto maps = ArrayAttr::get(&context, {mapA, mapB, mapC}); + + EXPECT_THAT(maps, Truly(isBatchVecmat)); +} + +TEST(isBatchVecmat, Matmul) { + MLIRContext context; + + AffineExpr m, n, k; + bindDims(&context, m, n, k); + auto mapA = AffineMapAttr::get(AffineMap::get(3, 0, {m, k}, &context)); + auto mapB = AffineMapAttr::get(AffineMap::get(3, 0, {k, n}, &context)); + auto mapC = AffineMapAttr::get(AffineMap::get(3, 0, {m, n}, &context)); + auto maps = ArrayAttr::get(&context, {mapA, mapB, mapC}); + + EXPECT_THAT(maps, Not(Truly(isBatchVecmat))); +} + +TEST(isBatchVecmat, WrongDimOrderMatrix) { + MLIRContext context; + + AffineExpr batch, k, n; + bindDims(&context, batch, k, n); + auto mapA = AffineMapAttr::get(AffineMap::get(3, 0, {batch, k}, &context)); + auto mapB = AffineMapAttr::get(AffineMap::get(3, 0, {batch, n, k}, &context)); + auto mapC = AffineMapAttr::get(AffineMap::get(3, 0, {batch, n}, &context)); + auto maps = ArrayAttr::get(&context, {mapA, mapB, mapC}); + + EXPECT_THAT(maps, Not(Truly(isBatchVecmat))); +} + } // namespace From 8149066fa532d82ff62a0629d5a9fab6bd4da768 Mon Sep 17 00:00:00 2001 From: Weining Lu Date: Thu, 26 Oct 2023 11:50:28 +0800 Subject: [PATCH 052/877] [LoongArch][test] Add some ABI regression tests for empty struct. NFC How empty structs (not as fields of container struct) are passed in C++ is not explicitly documented in psABI. This patch adds some tests showing the current handing of clang. Some of the results are different from gcc. Following patch(es) will try to fix the mismatch. --- .../LoongArch/abi-lp64d-empty-structs.c | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c index fb90bf556c19b..d0daafac336ec 100644 --- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c @@ -81,9 +81,62 @@ struct s8 test_s8(struct s8 a) { return a; } +/// Note: Below tests check how empty structs are passed while above tests check +/// empty structs as fields of container struct are ignored when flattening +/// structs to examine whether the container structs can be passed via FARs. + // CHECK-C: define{{.*}} void @test_s9() // CHECK-CXX: define{{.*}} i64 @_Z7test_s92s9(i64 {{.*}}) struct s9 { struct empty e; }; struct s9 test_s9(struct s9 a) { return a; } + +// CHECK-C: define{{.*}} void @test_s10() +// CHECK-CXX: define{{.*}} void @_Z8test_s103s10() +struct s10 { }; +struct s10 test_s10(struct s10 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s11() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s113s11(i64 {{.*}}) +struct s11 { struct { } s; }; +struct s11 test_s11(struct s11 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s12() +// CHECK-CXX: define{{.*}} void @_Z8test_s123s12() +struct s12 { int i[0]; }; +struct s12 test_s12(struct s12 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s13() +// CHECK-CXX: define{{.*}} void @_Z8test_s133s13() +struct s13 { struct { } s[0]; }; +struct s13 test_s13(struct s13 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s14() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s143s14(i64 {{.*}}) +struct s14 { struct { } s[1]; }; +struct s14 test_s14(struct s14 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s15() +// CHECK-CXX: define{{.*}} void @_Z8test_s153s15() +struct s15 { int : 0; }; +struct s15 test_s15(struct s15 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s16() +// CHECK-CXX: define{{.*}} void @_Z8test_s163s16() +struct s16 { int : 1; }; +struct s16 test_s16(struct s16 a) { + return a; +} From 658874e084e594f9881d5f1d004fba54d0a44d85 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 26 Oct 2023 13:53:54 +0200 Subject: [PATCH 053/877] [clang][Interp] Handle unknown-size arrays better (#68868) We unfortunately actually need to do some checks for array-to-pointer decays it seems. --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 14 +++++++- clang/lib/AST/Interp/EvalEmitter.cpp | 6 ++++ clang/lib/AST/Interp/Interp.cpp | 2 ++ clang/lib/AST/Interp/Interp.h | 20 +++++++++++ clang/lib/AST/Interp/Opcodes.td | 2 ++ clang/test/AST/Interp/arrays.cpp | 42 ++++++++++++++++++++++++ 6 files changed, 85 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 1b33c69b93aa4..1e508f8998abe 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -168,7 +168,16 @@ bool ByteCodeExprGen::VisitCastExpr(const CastExpr *CE) { return this->emitCastPointerIntegral(T, CE); } - case CK_ArrayToPointerDecay: + case CK_ArrayToPointerDecay: { + if (!this->visit(SubExpr)) + return false; + if (!this->emitArrayDecay(CE)) + return false; + if (DiscardResult) + return this->emitPopPtr(CE); + return true; + } + case CK_AtomicToNonAtomic: case CK_ConstructorConversion: case CK_FunctionToPointerDecay: @@ -505,6 +514,9 @@ bool ByteCodeExprGen::VisitImplicitValueInitExpr(const ImplicitValueIni if (QT->isRecordType()) return false; + if (QT->isIncompleteArrayType()) + return true; + if (QT->isArrayType()) { const ArrayType *AT = QT->getAsArrayTypeUnsafe(); assert(AT); diff --git a/clang/lib/AST/Interp/EvalEmitter.cpp b/clang/lib/AST/Interp/EvalEmitter.cpp index f8942291b3b16..9bc42057c5f57 100644 --- a/clang/lib/AST/Interp/EvalEmitter.cpp +++ b/clang/lib/AST/Interp/EvalEmitter.cpp @@ -185,6 +185,12 @@ bool EvalEmitter::emitRetValue(const SourceInfo &Info) { } return Ok; } + + if (Ty->isIncompleteArrayType()) { + R = APValue(APValue::UninitArray(), 0, 0); + return true; + } + if (const auto *AT = Ty->getAsArrayTypeUnsafe()) { const size_t NumElems = Ptr.getNumElems(); QualType ElemTy = AT->getElementType(); diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp index c87bb2fa6b02f..4a4c0922758c9 100644 --- a/clang/lib/AST/Interp/Interp.cpp +++ b/clang/lib/AST/Interp/Interp.cpp @@ -483,6 +483,8 @@ static bool CheckFieldsInitialized(InterpState &S, CodePtr OpPC, if (FieldType->isRecordType()) { Result &= CheckFieldsInitialized(S, OpPC, FieldPtr, FieldPtr.getRecord()); + } else if (FieldType->isIncompleteArrayType()) { + // Nothing to do here. } else if (FieldType->isArrayType()) { const auto *CAT = cast(FieldType->getAsArrayTypeUnsafe()); diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index 2132e8b0a8cfa..86cc267652951 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -1802,17 +1802,37 @@ inline bool ArrayElemPtr(InterpState &S, CodePtr OpPC) { const T &Offset = S.Stk.pop(); const Pointer &Ptr = S.Stk.peek(); + if (!CheckArray(S, OpPC, Ptr)) + return false; + if (!OffsetHelper(S, OpPC, Offset, Ptr)) return false; return NarrowPtr(S, OpPC); } +/// Just takes a pointer and checks if its' an incomplete +/// array type. +inline bool ArrayDecay(InterpState &S, CodePtr OpPC) { + const Pointer &Ptr = S.Stk.peek(); + + if (!Ptr.isUnknownSizeArray()) + return true; + + const SourceInfo &E = S.Current->getSource(OpPC); + S.FFDiag(E, diag::note_constexpr_unsupported_unsized_array); + + return false; +} + template ::T> inline bool ArrayElemPtrPop(InterpState &S, CodePtr OpPC) { const T &Offset = S.Stk.pop(); const Pointer &Ptr = S.Stk.pop(); + if (!CheckArray(S, OpPC, Ptr)) + return false; + if (!OffsetHelper(S, OpPC, Offset, Ptr)) return false; diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td index e1e7e5e2efbb0..69068e87d5720 100644 --- a/clang/lib/AST/Interp/Opcodes.td +++ b/clang/lib/AST/Interp/Opcodes.td @@ -687,3 +687,5 @@ def InvalidCast : Opcode { def InvalidDeclRef : Opcode { let Args = [ArgDeclRef]; } + +def ArrayDecay : Opcode; diff --git a/clang/test/AST/Interp/arrays.cpp b/clang/test/AST/Interp/arrays.cpp index 7110785ea4c66..d1673094c2660 100644 --- a/clang/test/AST/Interp/arrays.cpp +++ b/clang/test/AST/Interp/arrays.cpp @@ -455,3 +455,45 @@ namespace NoInitMapLeak { // ref-error {{not an integral constant expression}} \ // ref-note {{in call to}} } + +namespace Incomplete { + struct Foo { + char c; + int a[]; + }; + + constexpr Foo F{}; + constexpr const int *A = F.a; // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{array-to-pointer decay of array member without known bound}} \ + // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{array-to-pointer decay of array member without known bound}} + + constexpr const int *B = F.a + 1; // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{array-to-pointer decay of array member without known bound}} \ + // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{array-to-pointer decay of array member without known bound}} + + constexpr int C = *F.a; // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{array-to-pointer decay of array member without known bound}} \ + // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{array-to-pointer decay of array member without known bound}} + + + + /// These are from test/SemaCXX/constant-expression-cxx11.cpp + /// and are the only tests using the 'indexing of array without known bound' diagnostic. + /// We currently diagnose them differently. + extern int arr[]; // expected-note 3{{declared here}} + constexpr int *c = &arr[1]; // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{indexing of array without known bound}} \ + // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{read of non-constexpr variable 'arr'}} + constexpr int *d = &arr[1]; // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{indexing of array without known bound}} \ + // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{read of non-constexpr variable 'arr'}} + constexpr int *e = arr + 1; // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{indexing of array without known bound}} \ + // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{read of non-constexpr variable 'arr'}} +} From 64025b8eba200c0be7cedbb36c6dcbbea3ca96c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Thu, 26 Oct 2023 12:56:28 +0100 Subject: [PATCH 054/877] [mlir][SVE] Add an e2e test for vectorization of linalg.matmul (#69592) --delete-branch --- .../Dialect/Linalg/CPU/ArmSVE/matmul.mlir | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/matmul.mlir diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/matmul.mlir new file mode 100644 index 0000000000000..bc94161d5d375 --- /dev/null +++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/matmul.mlir @@ -0,0 +1,71 @@ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule \ +// RUN: -one-shot-bufferize -func-bufferize -cse -canonicalize -convert-vector-to-scf -arm-sve-legalize-vector-storage \ +// RUN: -convert-vector-to-llvm="enable-arm-sve" -test-lower-to-llvm | \ +// RUN: %mcr_aarch64_cmd -e=entry -entry-point-result=void --march=aarch64 --mattr="+sve" -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils | \ +// RUN: FileCheck %s + +func.func @entry() { + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %c4 = arith.constant 4 : index + %c0 = arith.constant 0 : index + %step = arith.constant 1 : index + %c0_f32 = arith.constant 0.0 : f32 + + %vscale = vector.vscale + %vl_fp = arith.muli %c4, %vscale : index + %A_alloc = bufferization.alloc_tensor(%c2, %c1) : tensor + %B_alloc = bufferization.alloc_tensor(%c1, %vl_fp) : tensor + %C_alloc = bufferization.alloc_tensor(%c2, %vl_fp) : tensor + + %pi = arith.constant 3.14 : f32 + %A = linalg.fill ins(%pi : f32) outs(%A_alloc : tensor) -> tensor + %B = linalg.fill ins(%pi : f32) outs(%B_alloc : tensor) -> tensor + %C_in = linalg.fill ins(%c0_f32 : f32) outs(%C_alloc : tensor) -> tensor + + %C_out = linalg.matmul ins(%A, %B: tensor, tensor) outs(%C_in: tensor) -> tensor + + // CHECK-LABEL: SVE: START OF TEST OUTPUT + vector.print str "SVE: START OF TEST OUTPUT" + + // There are at least 4 x f32 elements in every SVE vector, i.e. + // * %vscale >= 1. + // Hence, when checking the outupt there will always be at least 4 elements + // in every row. For implementations with wider vectors, you should see more + // elements being printed. + // CHECK-NEXT: Unranked Memref {{.*}} rank = 2 offset = 0 sizes = [2, 16] strides = [16, 1] data = + // CHECK-NEXT: [9.8596, 9.8596, 9.8596, 9.8596 + // CHECK-NEXT: [9.8596, 9.8596, 9.8596, 9.8596 + + %xf = tensor.cast %C_out : tensor to tensor<*xf32> + call @printMemrefF32(%xf) : (tensor<*xf32>) -> () + + // CHECK-NEXT: SVE: END OF TEST OUTPUT + vector.print str "SVE: END OF TEST OUTPUT" + + return +} + +transform.sequence failures(propagate) { +^bb1(%module_op: !transform.any_op): + %0 = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!transform.any_op) -> !transform.any_op + %func_op = get_parent_op %0 : (!transform.any_op) -> !transform.op<"func.func"> + // The tile sizes match the output matrix sizes + %1, %loops:3 = transform.structured.tile_using_for %0 [2, [4], 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) + %2 = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!transform.any_op) -> !transform.any_op + // The vector sizes match the output matrix sizes + // TOOD: Use variables to re-use "shared" sizes + transform.structured.vectorize %2 vector_sizes [2, [4], 1] : !transform.any_op + + transform.apply_patterns to %func_op { + transform.apply_patterns.vector.reduction_to_contract + transform.apply_patterns.vector.transfer_permutation_patterns + transform.apply_patterns.vector.lower_masked_transfers + } : !transform.op<"func.func"> + transform.apply_patterns to %func_op { + transform.apply_patterns.vector.lower_contraction lowering_strategy = "outerproduct" + transform.apply_patterns.vector.lower_outerproduct + } : !transform.op<"func.func"> +} + +func.func private @printMemrefF32(%ptr : tensor<*xf32>) From e9c4dc18bc9d34a2c055117a93799be9ade57d41 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 26 Oct 2023 12:51:45 +0100 Subject: [PATCH 055/877] Revert "[AMDGPU] Use `S_CSELECT` for uniform i1 ext (#69703)" This reverts commit a1260b5209968c08886e3c6183aa793de8931578. It was causing some Vulkan CTS failures. --- llvm/lib/Target/AMDGPU/SIInstructions.td | 25 ++++----- llvm/test/CodeGen/AMDGPU/saddo.ll | 65 ++++++++++-------------- llvm/test/CodeGen/AMDGPU/uaddo.ll | 46 ++++++++--------- llvm/test/CodeGen/AMDGPU/usubo.ll | 46 ++++++++--------- llvm/test/CodeGen/AMDGPU/zero_extend.ll | 2 +- 5 files changed, 77 insertions(+), 107 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 707a1c72b5b7c..567f1b812c180 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2278,24 +2278,17 @@ def : GCNPat < (REG_SEQUENCE SReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1) >; -multiclass ZExt_i64_i1_Pat { - def: GCNPat < - (i64 (ext i1:$src)), - (REG_SEQUENCE VReg_64, - (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), - /*src1mod*/(i32 0), /*src1*/(i32 1), $src), - sub0, (S_MOV_B32 (i32 0)), sub1) - >; - - def : GCNPat < - (i64 (UniformUnaryFrag SCC)), - (S_CSELECT_B64 (i64 1), (i64 0)) - >; -} +class ZExt_i64_i1_Pat : GCNPat < + (i64 (ext i1:$src)), + (REG_SEQUENCE VReg_64, + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 1), $src), + sub0, (S_MOV_B32 (i32 0)), sub1) +>; -defm : ZExt_i64_i1_Pat; -defm : ZExt_i64_i1_Pat; +def : ZExt_i64_i1_Pat; +def : ZExt_i64_i1_Pat; // FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that // REG_SEQUENCE patterns don't support instructions with multiple outputs. diff --git a/llvm/test/CodeGen/AMDGPU/saddo.ll b/llvm/test/CodeGen/AMDGPU/saddo.ll index f6f3e47c3be7a..cb3166d7a20d3 100644 --- a/llvm/test/CodeGen/AMDGPU/saddo.ll +++ b/llvm/test/CodeGen/AMDGPU/saddo.ll @@ -29,12 +29,10 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: s_xor_b64 s[4:5], s[6:7], vcc -; SI-NEXT: s_and_b64 s[4:5], s[4:5], exec -; SI-NEXT: s_cselect_b64 s[4:5], 1, 0 -; SI-NEXT: s_add_u32 s4, s10, s4 -; SI-NEXT: s_addc_u32 s5, s11, s5 -; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: v_mov_b32_e32 v1, s5 +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SI-NEXT: v_mov_b32_e32 v1, s11 +; SI-NEXT: v_add_i32_e32 v0, vcc, s10, v0 +; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -47,17 +45,15 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) ; VI-NEXT: s_add_u32 s2, s6, s0 ; VI-NEXT: v_mov_b32_e32 v2, s7 ; VI-NEXT: s_addc_u32 s3, s7, s1 +; VI-NEXT: v_cmp_lt_i64_e64 s[8:9], s[0:1], 0 ; VI-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[1:2] -; VI-NEXT: v_cmp_lt_i64_e64 s[0:1], s[0:1], 0 +; VI-NEXT: v_mov_b32_e32 v3, s3 +; VI-NEXT: s_xor_b64 s[0:1], s[8:9], vcc +; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; VI-NEXT: v_add_u32_e32 v2, vcc, s2, v2 ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_xor_b64 s[0:1], s[0:1], vcc -; VI-NEXT: s_and_b64 s[0:1], s[0:1], exec -; VI-NEXT: s_cselect_b64 s[0:1], 1, 0 -; VI-NEXT: s_add_u32 s0, s2, s0 -; VI-NEXT: s_addc_u32 s1, s3, s1 -; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s5 -; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; VI-NEXT: s_endpgm ; @@ -71,15 +67,13 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) ; GFX9-NEXT: s_add_u32 s0, s6, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s7, s3 +; GFX9-NEXT: v_cmp_lt_i64_e64 s[8:9], s[2:3], 0 ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1] -; GFX9-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], 0 -; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], vcc -; GFX9-NEXT: s_and_b64 s[2:3], s[2:3], exec -; GFX9-NEXT: s_cselect_b64 s[2:3], 1, 0 -; GFX9-NEXT: s_add_u32 s0, s0, s2 -; GFX9-NEXT: s_addc_u32 s1, s1, s3 -; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: s_xor_b64 s[2:3], s[8:9], vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3] +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm ; @@ -93,14 +87,11 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) ; GFX10-NEXT: s_add_u32 s0, s6, s2 ; GFX10-NEXT: s_addc_u32 s1, s7, s3 ; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], 0 -; GFX10-NEXT: v_cmp_lt_i64_e64 s6, s[0:1], s[6:7] -; GFX10-NEXT: s_xor_b32 s2, s2, s6 -; GFX10-NEXT: s_and_b32 s2, s2, exec_lo -; GFX10-NEXT: s_cselect_b64 s[2:3], 1, 0 -; GFX10-NEXT: s_add_u32 s0, s0, s2 -; GFX10-NEXT: s_addc_u32 s1, s1, s3 -; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: v_cmp_lt_i64_e64 s3, s[0:1], s[6:7] +; GFX10-NEXT: s_xor_b32 s2, s2, s3 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 +; GFX10-NEXT: v_add_co_u32 v0, s0, s0, v0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX10-NEXT: s_endpgm ; @@ -109,20 +100,18 @@ define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_add_u32 s2, s6, s0 ; GFX11-NEXT: s_addc_u32 s3, s7, s1 ; GFX11-NEXT: v_cmp_lt_i64_e64 s0, s[0:1], 0 -; GFX11-NEXT: v_cmp_lt_i64_e64 s6, s[2:3], s[6:7] +; GFX11-NEXT: v_cmp_lt_i64_e64 s1, s[2:3], s[6:7] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_xor_b32 s0, s0, s6 -; GFX11-NEXT: s_and_b32 s0, s0, exec_lo -; GFX11-NEXT: s_cselect_b64 s[0:1], 1, 0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_add_u32 s0, s2, s0 -; GFX11-NEXT: s_addc_u32 s1, s3, s1 -; GFX11-NEXT: v_mov_b32_e32 v0, s0 -; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: s_xor_b32 s0, s0, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_add_co_u32 v0, s0, s2, v0 +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) diff --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll index 0ebf3f5198203..4363db2351e7a 100644 --- a/llvm/test/CodeGen/AMDGPU/uaddo.ll +++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll @@ -7,23 +7,21 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; SI-LABEL: s_uaddo_i64_zext: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_add_u32 s0, s6, s0 -; SI-NEXT: v_mov_b32_e32 v0, s6 -; SI-NEXT: v_mov_b32_e32 v1, s7 -; SI-NEXT: s_addc_u32 s1, s7, s1 -; SI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1] -; SI-NEXT: s_and_b64 s[6:7], vcc, exec -; SI-NEXT: s_cselect_b64 s[6:7], 1, 0 -; SI-NEXT: s_add_u32 s6, s0, s6 -; SI-NEXT: s_addc_u32 s7, s1, s7 ; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_add_u32 s4, s6, s8 ; SI-NEXT: v_mov_b32_e32 v0, s6 ; SI-NEXT: v_mov_b32_e32 v1, s7 +; SI-NEXT: s_addc_u32 s5, s7, s9 +; SI-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1] +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-NEXT: v_mov_b32_e32 v1, s5 +; SI-NEXT: v_add_i32_e32 v0, vcc, s4, v0 +; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -32,19 +30,17 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s6 +; VI-NEXT: v_mov_b32_e32 v2, s6 ; VI-NEXT: s_add_u32 s0, s6, s0 +; VI-NEXT: v_mov_b32_e32 v3, s7 ; VI-NEXT: s_addc_u32 s1, s7, s1 -; VI-NEXT: v_mov_b32_e32 v2, s7 -; VI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[1:2] -; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_and_b64 s[2:3], vcc, exec -; VI-NEXT: s_cselect_b64 s[2:3], 1, 0 -; VI-NEXT: s_add_u32 s0, s0, s2 -; VI-NEXT: s_addc_u32 s1, s1, s3 +; VI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[2:3] ; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 -; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; VI-NEXT: s_endpgm ; @@ -56,15 +52,13 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: s_add_u32 s0, s6, s2 -; GFX9-NEXT: s_addc_u32 s1, s7, s3 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 +; GFX9-NEXT: s_addc_u32 s1, s7, s3 ; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1] -; GFX9-NEXT: s_and_b64 s[2:3], vcc, exec -; GFX9-NEXT: s_cselect_b64 s[2:3], 1, 0 -; GFX9-NEXT: s_add_u32 s0, s0, s2 -; GFX9-NEXT: s_addc_u32 s1, s1, s3 -; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) diff --git a/llvm/test/CodeGen/AMDGPU/usubo.ll b/llvm/test/CodeGen/AMDGPU/usubo.ll index ade0616137b17..37b5be3b672f2 100644 --- a/llvm/test/CodeGen/AMDGPU/usubo.ll +++ b/llvm/test/CodeGen/AMDGPU/usubo.ll @@ -8,23 +8,21 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; SI-LABEL: s_usubo_i64_zext: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_sub_u32 s0, s6, s0 -; SI-NEXT: v_mov_b32_e32 v0, s6 -; SI-NEXT: v_mov_b32_e32 v1, s7 -; SI-NEXT: s_subb_u32 s1, s7, s1 -; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1] -; SI-NEXT: s_and_b64 s[6:7], vcc, exec -; SI-NEXT: s_cselect_b64 s[6:7], 1, 0 -; SI-NEXT: s_add_u32 s6, s0, s6 -; SI-NEXT: s_addc_u32 s7, s1, s7 ; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_sub_u32 s4, s6, s8 ; SI-NEXT: v_mov_b32_e32 v0, s6 ; SI-NEXT: v_mov_b32_e32 v1, s7 +; SI-NEXT: s_subb_u32 s5, s7, s9 +; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1] +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-NEXT: v_mov_b32_e32 v1, s5 +; SI-NEXT: v_add_i32_e32 v0, vcc, s4, v0 +; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -33,19 +31,17 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s6 +; VI-NEXT: v_mov_b32_e32 v2, s6 ; VI-NEXT: s_sub_u32 s0, s6, s0 +; VI-NEXT: v_mov_b32_e32 v3, s7 ; VI-NEXT: s_subb_u32 s1, s7, s1 -; VI-NEXT: v_mov_b32_e32 v2, s7 -; VI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[1:2] -; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_and_b64 s[2:3], vcc, exec -; VI-NEXT: s_cselect_b64 s[2:3], 1, 0 -; VI-NEXT: s_add_u32 s0, s0, s2 -; VI-NEXT: s_addc_u32 s1, s1, s3 +; VI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[2:3] ; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 -; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; VI-NEXT: s_endpgm ; @@ -57,15 +53,13 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: s_sub_u32 s0, s6, s2 -; GFX9-NEXT: s_subb_u32 s1, s7, s3 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 +; GFX9-NEXT: s_subb_u32 s1, s7, s3 ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1] -; GFX9-NEXT: s_and_b64 s[2:3], vcc, exec -; GFX9-NEXT: s_cselect_b64 s[2:3], 1, 0 -; GFX9-NEXT: s_add_u32 s0, s0, s2 -; GFX9-NEXT: s_addc_u32 s1, s1, s3 -; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) #0 diff --git a/llvm/test/CodeGen/AMDGPU/zero_extend.ll b/llvm/test/CodeGen/AMDGPU/zero_extend.ll index 9933cdc18e5fd..1f532f2706de7 100644 --- a/llvm/test/CodeGen/AMDGPU/zero_extend.ll +++ b/llvm/test/CodeGen/AMDGPU/zero_extend.ll @@ -38,7 +38,7 @@ define amdgpu_kernel void @s_arg_zext_i1_to_i64(ptr addrspace(1) %out, i1 zeroex ; GCN-LABEL: {{^}}s_cmp_zext_i1_to_i64: ; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 0 ; GCN-DAG: s_cmp_eq_u32 -; GCN: s_cselect_b64 s[{{[0-9]+:[0-9]+}}], 1, 0 +; GCN: v_cndmask_b32 define amdgpu_kernel void @s_cmp_zext_i1_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { %cmp = icmp eq i32 %a, %b %ext = zext i1 %cmp to i64 From bb2b7530adc87dabc782141573ca063b1b76ce9e Mon Sep 17 00:00:00 2001 From: Christudasan Devadasan Date: Thu, 26 Oct 2023 17:19:58 +0530 Subject: [PATCH 056/877] [AMDGPU] precommit lit test for PR 69924. --- .../AMDGPU/bb-prolog-spill-during-regalloc.ll | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll diff --git a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll new file mode 100644 index 0000000000000..a844b577c842f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs --stop-after=regallocfast,1 -o - %s | FileCheck -check-prefix=REGALLOC %s + +; FIXME: There are two spill codes inserted wrongly in this test. +; They are inserted during regalloc for the BBLiveIns - the spill restores for vgpr1 in the Flow block (bb.1) and for vgpr0 in the return block (bb.4). +define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) { + ; REGALLOC-LABEL: name: prolog_spill + ; REGALLOC: bb.0.bb.0: + ; REGALLOC-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; REGALLOC-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; REGALLOC-NEXT: {{ $}} + ; REGALLOC-NEXT: renamable $vgpr3 = IMPLICIT_DEF + ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; REGALLOC-NEXT: renamable $vgpr1 = COPY killed $vgpr0 + ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 49 + ; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = V_CMP_GT_I32_e64 killed $vgpr1, killed $sgpr4, implicit $exec + ; REGALLOC-NEXT: renamable $sgpr6 = IMPLICIT_DEF + ; REGALLOC-NEXT: renamable $vgpr1 = COPY killed renamable $sgpr6 + ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; REGALLOC-NEXT: renamable $sgpr6_sgpr7 = COPY $exec, implicit-def $exec + ; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 renamable $sgpr6_sgpr7, killed renamable $sgpr4_sgpr5, implicit-def dead $scc + ; REGALLOC-NEXT: renamable $sgpr6_sgpr7 = S_XOR_B64 renamable $sgpr4_sgpr5, killed renamable $sgpr6_sgpr7, implicit-def dead $scc + ; REGALLOC-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr6, 0, $vgpr0, implicit-def $sgpr6_sgpr7, implicit $sgpr6_sgpr7 + ; REGALLOC-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr7, 1, $vgpr0, implicit killed $sgpr6_sgpr7 + ; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr0, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; REGALLOC-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr4_sgpr5 + ; REGALLOC-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; REGALLOC-NEXT: S_BRANCH %bb.3 + ; REGALLOC-NEXT: {{ $}} + ; REGALLOC-NEXT: bb.1.Flow: + ; REGALLOC-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; REGALLOC-NEXT: {{ $}} + ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; REGALLOC-NEXT: $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; REGALLOC-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5 + ; REGALLOC-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1 + ; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc + ; REGALLOC-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr4, 2, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 + ; REGALLOC-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr5, 3, $vgpr0, implicit $sgpr4_sgpr5 + ; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr0, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; REGALLOC-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc + ; REGALLOC-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; REGALLOC-NEXT: S_BRANCH %bb.2 + ; REGALLOC-NEXT: {{ $}} + ; REGALLOC-NEXT: bb.2.bb.1: + ; REGALLOC-NEXT: successors: %bb.4(0x80000000) + ; REGALLOC-NEXT: {{ $}} + ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) + ; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 10 + ; REGALLOC-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, killed $sgpr4, 0, implicit $exec + ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; REGALLOC-NEXT: S_BRANCH %bb.4 + ; REGALLOC-NEXT: {{ $}} + ; REGALLOC-NEXT: bb.3.bb.2: + ; REGALLOC-NEXT: successors: %bb.1(0x80000000) + ; REGALLOC-NEXT: {{ $}} + ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) + ; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 20 + ; REGALLOC-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, killed $sgpr4, 0, implicit $exec + ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; REGALLOC-NEXT: S_BRANCH %bb.1 + ; REGALLOC-NEXT: {{ $}} + ; REGALLOC-NEXT: bb.4.bb.3: + ; REGALLOC-NEXT: $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) + ; REGALLOC-NEXT: $sgpr4 = V_READLANE_B32 $vgpr1, 2, implicit-def $sgpr4_sgpr5 + ; REGALLOC-NEXT: $sgpr5 = V_READLANE_B32 $vgpr1, 3 + ; REGALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc + ; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 5 + ; REGALLOC-NEXT: renamable $vgpr0 = V_MUL_LO_U32_e64 killed $vgpr0, killed $sgpr4, implicit $exec + ; REGALLOC-NEXT: KILL killed renamable $vgpr1 + ; REGALLOC-NEXT: SI_RETURN implicit killed $vgpr0 +bb.0: + %cmp = icmp slt i32 %arg0, 50 + br i1 %cmp, label %bb.1, label %bb.2 + +bb.1: + %val1 = add i32 %arg1, 10 + br label %bb.3 + +bb.2: + %val2 = add i32 %arg2, 20 + br label %bb.3 + +bb.3: + %val = phi i32 [ %val1, %bb.1 ], [ %val2, %bb.2 ] + %ret = mul i32 %val, 5; + ret i32 %ret +} From ce6b9b3b58b6c9e51d87084c916fa7aef81401f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=BCtzel?= Date: Thu, 26 Oct 2023 14:30:11 +0200 Subject: [PATCH 057/877] [flang][runtime] Avoid dependency on libc++ for `std::__libcpp_verbose_abort` Changes in libc++ during the development cycle for LLVM 17 lead to the FortranRuntime library depending on libc++. Trying to build with Flang 17 that was built with clang++ 17 and libc++ 17 (on MinGW) leads to the following linker error: ld.lld: error: undefined symbol: std::__1::__libcpp_verbose_abort(char const*, ...) >>> referenced by libFortranRuntime.a(io-api.cpp.obj):(std::__1::__throw_bad_variant_access[abi:v170000]()) >>> referenced by libFortranRuntime.a(io-stmt.cpp.obj) >>> referenced by libFortranRuntime.a(unit.cpp.obj) That might be caused by std::get being called on a std::variant in common::visit. std::__libcpp_verbose_abort is a weak symbol in libc++ that can be optionally replaced by an alternative definition in user code (see: [1]) Do that to avoid a dependency of the FortranRuntime on libc++. [1]: https://libcxx.llvm.org/UsingLibcxx.html#overriding-the-default-termination-handler See also: https://github.com/msys2/MINGW-packages/pull/18002#issuecomment-1694412640 Differential Revision: https://reviews.llvm.org/D158957 --- flang/runtime/io-api.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index f9d60fecb149a..2fc530c7431a5 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -1517,3 +1517,17 @@ enum Iostat IONAME(CheckUnitNumberInRange128)(common::int128_t unit, #endif } // namespace Fortran::runtime::io + +#if defined(_LIBCPP_VERBOSE_ABORT) +// Provide own definition for `std::__libcpp_verbose_abort` to avoid dependency +// on the version provided by libc++. + +void std::__libcpp_verbose_abort(char const *format, ...) { + va_list list; + va_start(list, format); + std::vfprintf(stderr, format, list); + va_end(list); + + std::abort(); +} +#endif From 4638c29c3dd14048ca78e37f132d4c75f490d139 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 26 Oct 2023 13:09:21 +0200 Subject: [PATCH 058/877] [InstSimplify] Remove redundant pointer icmp fold (NFCI) This fold is already performed as part of simplifyICmpWithZero(). --- llvm/lib/Analysis/InstructionSimplify.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index b3feb2470e58e..3d192d0759a1e 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2736,13 +2736,6 @@ static Constant *computePointerICmp(CmpInst::Predicate Pred, Value *LHS, const TargetLibraryInfo *TLI = Q.TLI; const DominatorTree *DT = Q.DT; const Instruction *CxtI = Q.CxtI; - const InstrInfoQuery &IIQ = Q.IIQ; - - // A non-null pointer is not equal to a null pointer. - if (isa(RHS) && ICmpInst::isEquality(Pred) && - llvm::isKnownNonZero(LHS, DL, 0, nullptr, nullptr, nullptr, - IIQ.UseInstrInfo)) - return ConstantInt::get(getCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); // We can only fold certain predicates on pointer comparisons. switch (Pred) { From 851338b126b8307300ae1c41bd9fb46791365bec Mon Sep 17 00:00:00 2001 From: Allen Date: Thu, 26 Oct 2023 20:39:24 +0800 Subject: [PATCH 059/877] Revert "[SimplifyCFG] Delete the unnecessary range check for small mask operation (#70324) This reverts commit 5e07481d4240b5e8fd85f9b92df30849606c2af0. --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 22 ++--------------- .../Transforms/SimplifyCFG/switch_mask.ll | 24 +++++++++++-------- 2 files changed, 16 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 18187bcdedf09..68b5b1a78a346 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6598,8 +6598,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // If the default destination is unreachable, or if the lookup table covers // all values of the conditional variable, branch directly to the lookup table // BB. Otherwise, check that the condition is within the case range. - bool DefaultIsReachable = + const bool DefaultIsReachable = !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); // Create the BB that does the lookups. Module &Mod = *CommonDest->getParent()->getParent(); @@ -6630,25 +6631,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, BranchInst *RangeCheckBranch = nullptr; - // Grow the table to cover all possible index values to avoid the range check. - if (UseSwitchConditionAsTableIndex) { - ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false); - // Grow the table shouldn't have any size impact by checking - // WouldFitInRegister. - // TODO: Consider growing the table also when it doesn't fit in a register - // if no optsize is specified. - if (all_of(ResultTypes, [&](const auto &KV) { - return SwitchLookupTable::WouldFitInRegister( - DL, CR.getUpper().getLimitedValue(), KV.second /* ResultType */); - })) { - // The default branch is unreachable when we enlarge the lookup table. - // Adjust DefaultIsReachable to reuse code path. - TableSize = CR.getUpper().getZExtValue(); - DefaultIsReachable = false; - } - } - - const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); if (!DefaultIsReachable || GeneratingCoveredLookupTable) { Builder.CreateBr(LookupBB); if (DTU) diff --git a/llvm/test/Transforms/SimplifyCFG/switch_mask.ll b/llvm/test/Transforms/SimplifyCFG/switch_mask.ll index 123519bc69211..8c97a0660d070 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch_mask.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch_mask.ll @@ -8,11 +8,13 @@ define i1 @switch_lookup_with_small_i1(i64 %x) { ; CHECK-LABEL: @switch_lookup_with_small_i1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND:%.*]] = and i64 [[X:%.*]], 15 -; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[AND]] to i16 -; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i16 [[SWITCH_CAST]], 1 -; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i16 1030, [[SWITCH_SHIFTAMT]] -; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i16 [[SWITCH_DOWNSHIFT]] to i1 -; CHECK-NEXT: ret i1 [[SWITCH_MASKED]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[AND]], 11 +; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[AND]] to i11 +; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i11 [[SWITCH_CAST]], 1 +; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i11 -1018, [[SWITCH_SHIFTAMT]] +; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i11 [[SWITCH_DOWNSHIFT]] to i1 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_MASKED]], i1 false +; CHECK-NEXT: ret i1 [[TMP1]] ; entry: %and = and i64 %x, 15 @@ -35,11 +37,13 @@ define i8 @switch_lookup_with_small_i8(i64 %x) { ; CHECK-LABEL: @switch_lookup_with_small_i8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[REM:%.*]] = urem i64 [[X:%.*]], 5 -; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[REM]] to i40 -; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i40 [[SWITCH_CAST]], 8 -; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i40 460303, [[SWITCH_SHIFTAMT]] -; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i40 [[SWITCH_DOWNSHIFT]] to i8 -; CHECK-NEXT: ret i8 [[SWITCH_MASKED]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[REM]], 3 +; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[REM]] to i24 +; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i24 [[SWITCH_CAST]], 8 +; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i24 460303, [[SWITCH_SHIFTAMT]] +; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i24 [[SWITCH_DOWNSHIFT]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i8 [[SWITCH_MASKED]], i8 0 +; CHECK-NEXT: ret i8 [[TMP1]] ; entry: %rem = urem i64 %x, 5 From fabcadf2eb20dcc2fa262a5a1c6752ac93544e9d Mon Sep 17 00:00:00 2001 From: Erik Jonsson Date: Thu, 26 Oct 2023 14:43:38 +0200 Subject: [PATCH 060/877] Let M68kMCCodeEmitter set Scratch size. (#69898) The Scratch buffer passed to getBinaryCodeForInst needs to be able to hold any value returned by getMachineOpValue or other custom encoders. It's better to let the caller of getBinaryCodeForInst set the size of Scratch as it's impossible for VarLenCodeEmitterGen to know what the smallest needed size is. VarLenCodeEmitterGen now calculates its smallest needed Scratch bit width based on the slice operations and zero extends Scratch if it's too small. This only guarantees that Scratch has enough bits for the generated code not for getMachineOpValue or custom encoders. The smallest internal APInt representation uses one uint64_t word so there is no point in using a smaller size. --- .../M68k/MCTargetDesc/M68kMCCodeEmitter.cpp | 2 +- llvm/test/TableGen/VarLenEncoder.td | 4 ++-- llvm/utils/TableGen/VarLenCodeEmitterGen.cpp | 20 ++++++++++++++----- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp index 97f5d7a3dc077..a9ff059bc990b 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp @@ -209,7 +209,7 @@ void M68kMCCodeEmitter::encodeInstruction(const MCInst &MI, // Try using the new method first. APInt EncodedInst(16, 0U); - APInt Scratch(16, 0U); + APInt Scratch(64, 0U); // One APInt word is enough. getBinaryCodeForInstr(MI, Fixups, EncodedInst, Scratch, STI); ArrayRef Data(EncodedInst.getRawData(), EncodedInst.getNumWords()); diff --git a/llvm/test/TableGen/VarLenEncoder.td b/llvm/test/TableGen/VarLenEncoder.td index 3dd100a50fc58..0fabf4150b79d 100644 --- a/llvm/test/TableGen/VarLenEncoder.td +++ b/llvm/test/TableGen/VarLenEncoder.td @@ -65,7 +65,7 @@ def FOO32 : MyVarInst>; // CHECK: UINT64_C(46848), // FOO32 // CHECK-LABEL: case ::FOO16: { -// CHECK: Scratch = Scratch.zext(41); +// CHECK: Scratch.getBitWidth() < 16 // src.reg // CHECK: getMachineOpValue(MI, MI.getOperand(1), /*Pos=*/0, Scratch, Fixups, STI); // CHECK: Inst.insertBits(Scratch.extractBits(8, 0), 0); @@ -83,7 +83,7 @@ def FOO32 : MyVarInst>; // CHECK: Inst.insertBits(Scratch.extractBits(2, 0), 39); // CHECK-LABEL: case ::FOO32: { -// CHECK: Scratch = Scratch.zext(57); +// CHECK: Scratch.getBitWidth() < 32 // src.reg // CHECK: getMachineOpValue(MI, MI.getOperand(1), /*Pos=*/0, Scratch, Fixups, STI); // CHECK: Inst.insertBits(Scratch.extractBits(8, 0), 0); diff --git a/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp b/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp index 24f116bbeaced..bfb7e5c333170 100644 --- a/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp +++ b/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp @@ -60,6 +60,8 @@ #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" +#include + using namespace llvm; namespace { @@ -445,20 +447,17 @@ std::string VarLenCodeEmitterGen::getInstructionCases(Record *R, std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding( Record *R, AltEncodingTy Mode, const VarLenInst &VLI, CodeGenTarget &Target, int I) { - size_t BitWidth = VLI.size(); CodeGenInstruction &CGI = Target.getInstruction(R); std::string Case; raw_string_ostream SS(Case); - // Resize the scratch buffer. - if (BitWidth && !VLI.isFixedValueOnly()) - SS.indent(I) << "Scratch = Scratch.zext(" << BitWidth << ");\n"; // Populate based value. SS.indent(I) << "Inst = getInstBits" << Modes[Mode] << "(opcode);\n"; // Process each segment in VLI. size_t Offset = 0U; + unsigned HighScratchAccess = 0U; for (const auto &ES : VLI) { unsigned NumBits = ES.BitWidth; const Init *Val = ES.Value; @@ -497,6 +496,8 @@ std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding( << "Scratch.extractBits(" << utostr(NumBits) << ", " << utostr(LoBit) << ")" << ", " << Offset << ");\n"; + + HighScratchAccess = std::max(HighScratchAccess, NumBits + LoBit); } Offset += NumBits; } @@ -505,7 +506,16 @@ std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding( if (!PostEmitter.empty()) SS.indent(I) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n"; - return Case; + // Resize the scratch buffer if it's to small. + std::string ScratchResizeStr; + if (VLI.size() && !VLI.isFixedValueOnly()) { + raw_string_ostream RS(ScratchResizeStr); + RS.indent(I) << "if (Scratch.getBitWidth() < " << HighScratchAccess + << ") { Scratch = Scratch.zext(" << HighScratchAccess + << "); }\n"; + } + + return ScratchResizeStr + Case; } namespace llvm { From 2e85123bfe8501e08689a27c6cf93203df06654a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 26 Oct 2023 13:46:32 +0100 Subject: [PATCH 061/877] [VP] Check if VP ops with functional intrinsics are speculatable (#69504) Noticed whilst working on #69494. VP intrinsics whose functional equivalent is an intrinsic were being marked as their lanes being non-speculatable, even if the underlying intrinsic was speculatable. This meant that ```llvm %1 = call <4 x i32> @llvm.vp.umax(<4 x i32> %x, <4 x i32> %y, <4 x i1> %mask, i32 %evl) ``` would be expanded out to ```llvm %.splatinsert = insertelement <4 x i32> poison, i32 %evl, i64 0 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer %1 = icmp ult <4 x i32> , %.splat %2 = and <4 x i1> %1, %mask %3 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y) ``` instead of ```llvm %1 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y) ``` The cause of this was isSafeToSpeculativelyExecuteWithOpcode checking the function attributes for the VP instruction itself, not the functional intrinsic. Since isSafeToSpeculativelyExecuteWithOpcode expects an already materialized instruction, we can't use it directly for the intrinsic case. So this fixes it by manually checking the function attributes on the intrinsic. --- llvm/lib/CodeGen/ExpandVectorPredication.cpp | 9 +- llvm/test/CodeGen/Generic/expand-vp.ll | 92 +++++++++++++------- 2 files changed, 68 insertions(+), 33 deletions(-) diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index 2d4da33a566dc..6c873a9aee27f 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -123,9 +123,12 @@ static bool maySpeculateLanes(VPIntrinsic &VPI) { if (isa(VPI)) return false; // Fallback to whether the intrinsic is speculatable. - std::optional OpcOpt = VPI.getFunctionalOpcode(); - unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call); - return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, &VPI); + if (auto IntrID = VPI.getFunctionalIntrinsicID()) + return Intrinsic::getAttributes(VPI.getContext(), *IntrID) + .hasFnAttr(Attribute::AttrKind::Speculatable); + if (auto Opc = VPI.getFunctionalOpcode()) + return isSafeToSpeculativelyExecuteWithOpcode(*Opc, &VPI); + return false; } //// } Helpers diff --git a/llvm/test/CodeGen/Generic/expand-vp.ll b/llvm/test/CodeGen/Generic/expand-vp.ll index 509f86a64d9ce..40d183273b86d 100644 --- a/llvm/test/CodeGen/Generic/expand-vp.ll +++ b/llvm/test/CodeGen/Generic/expand-vp.ll @@ -18,6 +18,10 @@ declare <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) declare <8 x i32> @llvm.vp.srem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) declare <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) declare <8 x i32> @llvm.vp.urem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.smax.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.smin.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.umax.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.umin.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) ; Bit arith declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) @@ -52,12 +56,16 @@ define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i3 %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) - %r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) - %r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) - %r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) - %rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) - %rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) - %rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) + %r7 = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) + %r8 = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) + %r9 = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) + %rA = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) + %rB = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) + %rC = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) + %rD = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) + %rE = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) + %rF = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) + %r10 = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) ret void } @@ -70,6 +78,10 @@ declare @llvm.vp.sdiv.nxv4i32(, @llvm.vp.srem.nxv4i32(, , , i32) declare @llvm.vp.udiv.nxv4i32(, , , i32) declare @llvm.vp.urem.nxv4i32(, , , i32) +declare @llvm.vp.smax.nxv4i32(, , , i32) +declare @llvm.vp.smin.nxv4i32(, , , i32) +declare @llvm.vp.umax.nxv4i32(, , , i32) +declare @llvm.vp.umin.nxv4i32(, , , i32) ; Bit arith declare @llvm.vp.and.nxv4i32(, , , i32) declare @llvm.vp.xor.nxv4i32(, , , i32) @@ -87,12 +99,16 @@ define void @test_vp_int_vscale( %i0, %i1, %r4 = call @llvm.vp.srem.nxv4i32( %i0, %i1, %m, i32 %n) %r5 = call @llvm.vp.udiv.nxv4i32( %i0, %i1, %m, i32 %n) %r6 = call @llvm.vp.urem.nxv4i32( %i0, %i1, %m, i32 %n) - %r7 = call @llvm.vp.and.nxv4i32( %i0, %i1, %m, i32 %n) - %r8 = call @llvm.vp.or.nxv4i32( %i0, %i1, %m, i32 %n) - %r9 = call @llvm.vp.xor.nxv4i32( %i0, %i1, %m, i32 %n) - %rA = call @llvm.vp.ashr.nxv4i32( %i0, %i1, %m, i32 %n) - %rB = call @llvm.vp.lshr.nxv4i32( %i0, %i1, %m, i32 %n) - %rC = call @llvm.vp.shl.nxv4i32( %i0, %i1, %m, i32 %n) + %r7 = call @llvm.vp.smax.nxv4i32( %i0, %i1, %m, i32 %n) + %r8 = call @llvm.vp.smin.nxv4i32( %i0, %i1, %m, i32 %n) + %r9 = call @llvm.vp.umax.nxv4i32( %i0, %i1, %m, i32 %n) + %rA = call @llvm.vp.umin.nxv4i32( %i0, %i1, %m, i32 %n) + %rB = call @llvm.vp.and.nxv4i32( %i0, %i1, %m, i32 %n) + %rC = call @llvm.vp.or.nxv4i32( %i0, %i1, %m, i32 %n) + %rD = call @llvm.vp.xor.nxv4i32( %i0, %i1, %m, i32 %n) + %rE = call @llvm.vp.ashr.nxv4i32( %i0, %i1, %m, i32 %n) + %rF = call @llvm.vp.lshr.nxv4i32( %i0, %i1, %m, i32 %n) + %r10 = call @llvm.vp.shl.nxv4i32( %i0, %i1, %m, i32 %n) ret void } @@ -166,6 +182,10 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; ALL-CONVERT: %{{.+}} = udiv <8 x i32> %i0, %{{.+}} ; ALL-CONVERT-NOT: %{{.+}} = urem <8 x i32> %i0, %i1 ; ALL-CONVERT: %{{.+}} = urem <8 x i32> %i0, %{{.+}} +; ALL-CONVERT-NEXT: %{{.+}} = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %i0, <8 x i32> %i1) +; ALL-CONVERT-NEXT: %{{.+}} = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %i0, <8 x i32> %i1) +; ALL-CONVERT-NEXT: %{{.+}} = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %i0, <8 x i32> %i1) +; ALL-CONVERT-NEXT: %{{.+}} = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %i0, <8 x i32> %i1) ; ALL-CONVERT-NEXT: %{{.+}} = and <8 x i32> %i0, %i1 ; ALL-CONVERT-NEXT: %{{.+}} = or <8 x i32> %i0, %i1 ; ALL-CONVERT-NEXT: %{{.+}} = xor <8 x i32> %i0, %i1 @@ -263,12 +283,16 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; LEGAL_LEGAL-NEXT: %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) ; LEGAL_LEGAL-NEXT: %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) ; LEGAL_LEGAL-NEXT: %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) -; LEGAL_LEGAL-NEXT: %r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) -; LEGAL_LEGAL-NEXT: %r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) -; LEGAL_LEGAL-NEXT: %r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) -; LEGAL_LEGAL-NEXT: %rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) -; LEGAL_LEGAL-NEXT: %rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) -; LEGAL_LEGAL-NEXT: %rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) +; LEGAL_LEGAL-NEXT: %r7 = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) +; LEGAL_LEGAL-NEXT: %r8 = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) +; LEGAL_LEGAL-NEXT: %r9 = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) +; LEGAL_LEGAL-NEXT: %rA = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) +; LEGAL_LEGAL-NEXT: %rB = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) +; LEGAL_LEGAL-NEXT: %rC = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) +; LEGAL_LEGAL-NEXT: %rD = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) +; LEGAL_LEGAL-NEXT: %rE = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) +; LEGAL_LEGAL-NEXT: %rF = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) +; LEGAL_LEGAL-NEXT: %r10 = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) ; LEGAL_LEGAL-NEXT: ret void ; LEGAL_LEGAL:define void @test_vp_int_vscale( %i0, %i1, %i2, %f3, %m, i32 %n) { @@ -279,12 +303,16 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; LEGAL_LEGAL-NEXT: %r4 = call @llvm.vp.srem.nxv4i32( %i0, %i1, %m, i32 %n) ; LEGAL_LEGAL-NEXT: %r5 = call @llvm.vp.udiv.nxv4i32( %i0, %i1, %m, i32 %n) ; LEGAL_LEGAL-NEXT: %r6 = call @llvm.vp.urem.nxv4i32( %i0, %i1, %m, i32 %n) -; LEGAL_LEGAL-NEXT: %r7 = call @llvm.vp.and.nxv4i32( %i0, %i1, %m, i32 %n) -; LEGAL_LEGAL-NEXT: %r8 = call @llvm.vp.or.nxv4i32( %i0, %i1, %m, i32 %n) -; LEGAL_LEGAL-NEXT: %r9 = call @llvm.vp.xor.nxv4i32( %i0, %i1, %m, i32 %n) -; LEGAL_LEGAL-NEXT: %rA = call @llvm.vp.ashr.nxv4i32( %i0, %i1, %m, i32 %n) -; LEGAL_LEGAL-NEXT: %rB = call @llvm.vp.lshr.nxv4i32( %i0, %i1, %m, i32 %n) -; LEGAL_LEGAL-NEXT: %rC = call @llvm.vp.shl.nxv4i32( %i0, %i1, %m, i32 %n) +; LEGAL_LEGAL-NEXT: %r7 = call @llvm.vp.smax.nxv4i32( %i0, %i1, %m, i32 %n) +; LEGAL_LEGAL-NEXT: %r8 = call @llvm.vp.smin.nxv4i32( %i0, %i1, %m, i32 %n) +; LEGAL_LEGAL-NEXT: %r9 = call @llvm.vp.umax.nxv4i32( %i0, %i1, %m, i32 %n) +; LEGAL_LEGAL-NEXT: %rA = call @llvm.vp.umin.nxv4i32( %i0, %i1, %m, i32 %n) +; LEGAL_LEGAL-NEXT: %rB = call @llvm.vp.and.nxv4i32( %i0, %i1, %m, i32 %n) +; LEGAL_LEGAL-NEXT: %rC = call @llvm.vp.or.nxv4i32( %i0, %i1, %m, i32 %n) +; LEGAL_LEGAL-NEXT: %rD = call @llvm.vp.xor.nxv4i32( %i0, %i1, %m, i32 %n) +; LEGAL_LEGAL-NEXT: %rE = call @llvm.vp.ashr.nxv4i32( %i0, %i1, %m, i32 %n) +; LEGAL_LEGAL-NEXT: %rF = call @llvm.vp.lshr.nxv4i32( %i0, %i1, %m, i32 %n) +; LEGAL_LEGAL-NEXT: %r10 = call @llvm.vp.shl.nxv4i32( %i0, %i1, %m, i32 %n) ; LEGAL_LEGAL-NEXT: ret void ; LEGAL_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) { @@ -342,12 +370,16 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; DISCARD_LEGAL-NOT: %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) ; DISCARD_LEGAL-NOT: %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) ; DISCARD_LEGAL-NOT: %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) -; DISCARD_LEGAL: %r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) -; DISCARD_LEGAL-NEXT: %r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) -; DISCARD_LEGAL-NEXT: %r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) -; DISCARD_LEGAL-NEXT: %rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) -; DISCARD_LEGAL-NEXT: %rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) -; DISCARD_LEGAL-NEXT: %rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) +; DISCARD_LEGAL: %r7 = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) +; DISCARD_LEGAL: %r8 = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) +; DISCARD_LEGAL: %r9 = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) +; DISCARD_LEGAL: %rA = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) +; DISCARD_LEGAL-NEXT: %rB = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) +; DISCARD_LEGAL-NEXT: %rC = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) +; DISCARD_LEGAL-NEXT: %rD = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) +; DISCARD_LEGAL-NEXT: %rE = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) +; DISCARD_LEGAL-NEXT: %rF = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) +; DISCARD_LEGAL-NEXT: %r10 = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) ; DISCARD_LEGAL-NEXT: ret void ; TODO compute vscale only once and use caching. From 38c9fab8f34a17462d03eb39fd2d6363f0c81f3e Mon Sep 17 00:00:00 2001 From: Pete Steinfeld <47540744+psteinfeld@users.noreply.github.com> Date: Thu, 26 Oct 2023 05:50:38 -0700 Subject: [PATCH 062/877] [flang] Regularize TODO messages for coarray intrinsics (#70281) Apply a tag to coarray intrinsics to make them easier to recognize along with other coarray constructs. See pull request #69227 for a similar change. --- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 0a023bc6b21ea..fe40fd821f010 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -1343,6 +1343,12 @@ static bool isIntrinsicModuleProcedure(llvm::StringRef name) { name.startswith("ieee_") || name.startswith("__ppc_"); } +static bool isCoarrayIntrinsic(llvm::StringRef name) { + return name.startswith("atomic_") || name.startswith("co_") || + name.contains("image") || name.endswith("cobound") || + name.equals("team_number"); +} + /// Return the generic name of an intrinsic module procedure specific name. /// Remove any "__builtin_" prefix, and any specific suffix of the form /// {_[ail]?[0-9]+}*, such as _1 or _a4. @@ -1363,6 +1369,8 @@ llvm::StringRef genericName(llvm::StringRef specificName) { void crashOnMissingIntrinsic(mlir::Location loc, llvm::StringRef name) { if (isIntrinsicModuleProcedure(name)) TODO(loc, "intrinsic module procedure: " + llvm::Twine(name)); + else if (isCoarrayIntrinsic(name)) + TODO(loc, "coarray: intrinsic " + llvm::Twine(name)); else TODO(loc, "intrinsic: " + llvm::Twine(name)); } From e27ff897c2bf88d9c0b3d101bbe5e830e2831203 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Thu, 26 Oct 2023 12:49:56 +0000 Subject: [PATCH 063/877] Revert "[mlir][SVE] Add an e2e test for vectorization of linalg.matmul (#69592)" Broken bot: * https://lab.llvm.org/buildbot/#/builders/197/builds/10572 This reverts commit 64025b8eba200c0be7cedbb36c6dcbbea3ca96c7. --- .../Dialect/Linalg/CPU/ArmSVE/matmul.mlir | 71 ------------------- 1 file changed, 71 deletions(-) delete mode 100644 mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/matmul.mlir diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/matmul.mlir deleted file mode 100644 index bc94161d5d375..0000000000000 --- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/matmul.mlir +++ /dev/null @@ -1,71 +0,0 @@ -// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule \ -// RUN: -one-shot-bufferize -func-bufferize -cse -canonicalize -convert-vector-to-scf -arm-sve-legalize-vector-storage \ -// RUN: -convert-vector-to-llvm="enable-arm-sve" -test-lower-to-llvm | \ -// RUN: %mcr_aarch64_cmd -e=entry -entry-point-result=void --march=aarch64 --mattr="+sve" -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils | \ -// RUN: FileCheck %s - -func.func @entry() { - %c1 = arith.constant 1 : index - %c2 = arith.constant 2 : index - %c4 = arith.constant 4 : index - %c0 = arith.constant 0 : index - %step = arith.constant 1 : index - %c0_f32 = arith.constant 0.0 : f32 - - %vscale = vector.vscale - %vl_fp = arith.muli %c4, %vscale : index - %A_alloc = bufferization.alloc_tensor(%c2, %c1) : tensor - %B_alloc = bufferization.alloc_tensor(%c1, %vl_fp) : tensor - %C_alloc = bufferization.alloc_tensor(%c2, %vl_fp) : tensor - - %pi = arith.constant 3.14 : f32 - %A = linalg.fill ins(%pi : f32) outs(%A_alloc : tensor) -> tensor - %B = linalg.fill ins(%pi : f32) outs(%B_alloc : tensor) -> tensor - %C_in = linalg.fill ins(%c0_f32 : f32) outs(%C_alloc : tensor) -> tensor - - %C_out = linalg.matmul ins(%A, %B: tensor, tensor) outs(%C_in: tensor) -> tensor - - // CHECK-LABEL: SVE: START OF TEST OUTPUT - vector.print str "SVE: START OF TEST OUTPUT" - - // There are at least 4 x f32 elements in every SVE vector, i.e. - // * %vscale >= 1. - // Hence, when checking the outupt there will always be at least 4 elements - // in every row. For implementations with wider vectors, you should see more - // elements being printed. - // CHECK-NEXT: Unranked Memref {{.*}} rank = 2 offset = 0 sizes = [2, 16] strides = [16, 1] data = - // CHECK-NEXT: [9.8596, 9.8596, 9.8596, 9.8596 - // CHECK-NEXT: [9.8596, 9.8596, 9.8596, 9.8596 - - %xf = tensor.cast %C_out : tensor to tensor<*xf32> - call @printMemrefF32(%xf) : (tensor<*xf32>) -> () - - // CHECK-NEXT: SVE: END OF TEST OUTPUT - vector.print str "SVE: END OF TEST OUTPUT" - - return -} - -transform.sequence failures(propagate) { -^bb1(%module_op: !transform.any_op): - %0 = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!transform.any_op) -> !transform.any_op - %func_op = get_parent_op %0 : (!transform.any_op) -> !transform.op<"func.func"> - // The tile sizes match the output matrix sizes - %1, %loops:3 = transform.structured.tile_using_for %0 [2, [4], 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) - %2 = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!transform.any_op) -> !transform.any_op - // The vector sizes match the output matrix sizes - // TOOD: Use variables to re-use "shared" sizes - transform.structured.vectorize %2 vector_sizes [2, [4], 1] : !transform.any_op - - transform.apply_patterns to %func_op { - transform.apply_patterns.vector.reduction_to_contract - transform.apply_patterns.vector.transfer_permutation_patterns - transform.apply_patterns.vector.lower_masked_transfers - } : !transform.op<"func.func"> - transform.apply_patterns to %func_op { - transform.apply_patterns.vector.lower_contraction lowering_strategy = "outerproduct" - transform.apply_patterns.vector.lower_outerproduct - } : !transform.op<"func.func"> -} - -func.func private @printMemrefF32(%ptr : tensor<*xf32>) From e01efddbf3f977525707d25f500300f62b98fe28 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 26 Oct 2023 14:51:30 +0200 Subject: [PATCH 064/877] [clang][Interp] Correctly emit destructors for multi-dimensional arrays (#69140) We were not taking those into account correctly when emitting destructors. Fix that and add tests for it. Fixes #69115 --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 33 +++++++++----- clang/test/AST/Interp/arrays.cpp | 57 ++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 12 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 1e508f8998abe..a5141d728d832 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -2732,19 +2732,28 @@ bool ByteCodeExprGen::emitRecordDestruction(const Descriptor *Desc) { // Arrays. if (Desc->isArray()) { const Descriptor *ElemDesc = Desc->ElemDesc; - const Record *ElemRecord = ElemDesc->ElemRecord; - assert(ElemRecord); // This is not a primitive array. + assert(ElemDesc); + + // Don't need to do anything for these. + if (ElemDesc->isPrimitiveArray()) + return this->emitPopPtr(SourceInfo{}); + + // If this is an array of record types, check if we need + // to call the element destructors at all. If not, try + // to save the work. + if (const Record *ElemRecord = ElemDesc->ElemRecord) { + if (const CXXDestructorDecl *Dtor = ElemRecord->getDestructor(); + !Dtor || Dtor->isTrivial()) + return this->emitPopPtr(SourceInfo{}); + } - if (const CXXDestructorDecl *Dtor = ElemRecord->getDestructor(); - Dtor && !Dtor->isTrivial()) { - for (ssize_t I = Desc->getNumElems() - 1; I >= 0; --I) { - if (!this->emitConstUint64(I, SourceInfo{})) - return false; - if (!this->emitArrayElemPtrUint64(SourceInfo{})) - return false; - if (!this->emitRecordDestruction(Desc->ElemDesc)) - return false; - } + for (ssize_t I = Desc->getNumElems() - 1; I >= 0; --I) { + if (!this->emitConstUint64(I, SourceInfo{})) + return false; + if (!this->emitArrayElemPtrUint64(SourceInfo{})) + return false; + if (!this->emitRecordDestruction(ElemDesc)) + return false; } return this->emitPopPtr(SourceInfo{}); } diff --git a/clang/test/AST/Interp/arrays.cpp b/clang/test/AST/Interp/arrays.cpp index d1673094c2660..34e0086fb9ee8 100644 --- a/clang/test/AST/Interp/arrays.cpp +++ b/clang/test/AST/Interp/arrays.cpp @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify %s +// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -std=c++20 -verify %s // RUN: %clang_cc1 -verify=ref %s +// RUN: %clang_cc1 -verify=ref -std=c++20 %s constexpr int m = 3; constexpr const int *foo[][5] = { @@ -497,3 +499,58 @@ namespace Incomplete { // expected-error {{must be initialized by a constant expression}} \ // expected-note {{read of non-constexpr variable 'arr'}} } + +namespace GH69115 { + /// This used to crash because we were trying to emit destructors for the + /// array. + constexpr int foo() { + int arr[2][2] = {1, 2, 3, 4}; + return 0; + } + static_assert(foo() == 0, ""); + + /// Test that we still emit the destructors for multi-dimensional + /// composite arrays. +#if __cplusplus >= 202002L + constexpr void assert(bool C) { + if (C) + return; + // Invalid in constexpr. + (void)(1 / 0); // expected-warning {{undefined}} \ + // ref-warning {{undefined}} + } + + class F { + public: + int a; + int *dtor; + int &idx; + constexpr F(int a, int *dtor, int &idx) : a(a), dtor(dtor), idx(idx) {} + constexpr ~F() noexcept(false){ + dtor[idx] = a; + ++idx; + } + }; + constexpr int foo2() { + int dtorIndices[] = {0, 0, 0, 0}; + int idx = 0; + + { + F arr[2][2] = {F(1, dtorIndices, idx), + F(2, dtorIndices, idx), + F(3, dtorIndices, idx), + F(4, dtorIndices, idx)}; + } + + /// Reverse-reverse order. + assert(idx == 4); + assert(dtorIndices[0] == 4); + assert(dtorIndices[1] == 3); + assert(dtorIndices[2] == 2); + assert(dtorIndices[3] == 1); + + return 0; + } + static_assert(foo2() == 0, ""); +#endif +} From 560bad013ebcb8d2c2c1722e35270b9a70ab40ce Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 26 Jul 2023 07:47:08 -0700 Subject: [PATCH 065/877] [SLP]Improve isGatherShuffledEntry by trying per-register shuffle. Currently when building gather/buildvector node, we try to build nodes shuffles without taking into account separate vector registers. We can improve final codegen and the whole vectorization process by including this info into the analysis and the vector code emission, allows to emit better vectorized code. Differential Revision: https://reviews.llvm.org/D149742 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 462 +++++++++++++----- .../X86/multi-nodes-to-shuffle.ll | 45 +- 2 files changed, 387 insertions(+), 120 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4f82d2d1d6d91..9b5da445daaab 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2507,17 +2507,31 @@ class BoUpSLP { /// instruction in the list). Instruction &getLastInstructionInBundle(const TreeEntry *E); - /// Checks if the gathered \p VL can be represented as shuffle(s) of previous - /// tree entries. + /// Checks if the gathered \p VL can be represented as a single register + /// shuffle(s) of previous tree entries. /// \param TE Tree entry checked for permutation. /// \param VL List of scalars (a subset of the TE scalar), checked for - /// permutations. + /// permutations. Must form single-register vector. /// \returns ShuffleKind, if gathered values can be represented as shuffles of - /// previous tree entries. \p Mask is filled with the shuffle mask. + /// previous tree entries. \p Part of \p Mask is filled with the shuffle mask. std::optional - isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, - SmallVectorImpl &Mask, - SmallVectorImpl &Entries); + isGatherShuffledSingleRegisterEntry( + const TreeEntry *TE, ArrayRef VL, MutableArrayRef Mask, + SmallVectorImpl &Entries, unsigned Part); + + /// Checks if the gathered \p VL can be represented as multi-register + /// shuffle(s) of previous tree entries. + /// \param TE Tree entry checked for permutation. + /// \param VL List of scalars (a subset of the TE scalar), checked for + /// permutations. + /// \returns per-register series of ShuffleKind, if gathered values can be + /// represented as shuffles of previous tree entries. \p Mask is filled with + /// the shuffle mask (also on per-register base). + SmallVector> + isGatherShuffledEntry( + const TreeEntry *TE, ArrayRef VL, SmallVectorImpl &Mask, + SmallVectorImpl> &Entries, + unsigned NumParts); /// \returns the scalarization cost for this list of values. Assuming that /// this subtree gets vectorized, we may need to extract the values from the @@ -6990,6 +7004,11 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { BoUpSLP &R; SmallPtrSetImpl &CheckedExtracts; constexpr static TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + /// While set, still trying to estimate the cost for the same nodes and we + /// can delay actual cost estimation (virtual shuffle instruction emission). + /// May help better estimate the cost if same nodes must be permuted + allows + /// to move most of the long shuffles cost estimation to TTI. + bool SameNodesEstimated = true; static Constant *getAllOnesValue(const DataLayout &DL, Type *Ty) { if (Ty->getScalarType()->isPointerTy()) { @@ -7230,6 +7249,49 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { } return Cost; } + /// Transforms mask \p CommonMask per given \p Mask to make proper set after + /// shuffle emission. + static void transformMaskAfterShuffle(MutableArrayRef CommonMask, + ArrayRef Mask) { + for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx) + if (Mask[Idx] != PoisonMaskElem) + CommonMask[Idx] = Idx; + } + /// Adds the cost of reshuffling \p E1 and \p E2 (if present), using given + /// mask \p Mask, register number \p Part, that includes \p SliceSize + /// elements. + void estimateNodesPermuteCost(const TreeEntry &E1, const TreeEntry *E2, + ArrayRef Mask, unsigned Part, + unsigned SliceSize) { + if (SameNodesEstimated) { + // Delay the cost estimation if the same nodes are reshuffling. + // If we already requested the cost of reshuffling of E1 and E2 before, no + // need to estimate another cost with the sub-Mask, instead include this + // sub-Mask into the CommonMask to estimate it later and avoid double cost + // estimation. + if ((InVectors.size() == 2 && + InVectors.front().get() == &E1 && + InVectors.back().get() == E2) || + (!E2 && InVectors.front().get() == &E1)) { + assert(all_of(ArrayRef(CommonMask).slice(Part * SliceSize, SliceSize), + [](int Idx) { return Idx == PoisonMaskElem; }) && + "Expected all poisoned elements."); + ArrayRef SubMask = + ArrayRef(Mask).slice(Part * SliceSize, SliceSize); + copy(SubMask, std::next(CommonMask.begin(), SliceSize * Part)); + return; + } + // Found non-matching nodes - need to estimate the cost for the matched + // and transform mask. + Cost += createShuffle(InVectors.front(), + InVectors.size() == 1 ? nullptr : InVectors.back(), + CommonMask); + transformMaskAfterShuffle(CommonMask, CommonMask); + } + SameNodesEstimated = false; + Cost += createShuffle(&E1, E2, Mask); + transformMaskAfterShuffle(CommonMask, Mask); + } class ShuffleCostBuilder { const TargetTransformInfo &TTI; @@ -7493,31 +7555,74 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { // into a vector and can be represented as a permutation elements in a // single input vector or of 2 input vectors. Cost += computeExtractCost(VL, Mask, ShuffleKind); + InVectors.assign(1, E); + CommonMask.assign(Mask.begin(), Mask.end()); + transformMaskAfterShuffle(CommonMask, CommonMask); + SameNodesEstimated = false; return VecBase; } - void add(const TreeEntry *E1, const TreeEntry *E2, ArrayRef Mask) { - if (E1 == E2) { + void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef Mask) { + if (&E1 == &E2) { assert(all_of(Mask, - [=](int Idx) { - return Idx < static_cast(E1->getVectorFactor()); + [&](int Idx) { + return Idx < static_cast(E1.getVectorFactor()); }) && "Expected single vector shuffle mask."); add(E1, Mask); return; } - CommonMask.assign(Mask.begin(), Mask.end()); - InVectors.assign({E1, E2}); + if (InVectors.empty()) { + CommonMask.assign(Mask.begin(), Mask.end()); + InVectors.assign({&E1, &E2}); + return; + } + assert(!CommonMask.empty() && "Expected non-empty common mask."); + auto *MaskVecTy = + FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size()); + unsigned NumParts = TTI.getNumberOfParts(MaskVecTy); + assert(NumParts > 0 && NumParts < Mask.size() && + "Expected positive number of registers."); + unsigned SliceSize = Mask.size() / NumParts; + const auto *It = + find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; }); + unsigned Part = std::distance(Mask.begin(), It) / SliceSize; + estimateNodesPermuteCost(E1, &E2, Mask, Part, SliceSize); } - void add(const TreeEntry *E1, ArrayRef Mask) { - CommonMask.assign(Mask.begin(), Mask.end()); - InVectors.assign(1, E1); + void add(const TreeEntry &E1, ArrayRef Mask) { + if (InVectors.empty()) { + CommonMask.assign(Mask.begin(), Mask.end()); + InVectors.assign(1, &E1); + return; + } + assert(!CommonMask.empty() && "Expected non-empty common mask."); + auto *MaskVecTy = + FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size()); + unsigned NumParts = TTI.getNumberOfParts(MaskVecTy); + assert(NumParts > 0 && NumParts < Mask.size() && + "Expected positive number of registers."); + unsigned SliceSize = Mask.size() / NumParts; + const auto *It = + find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; }); + unsigned Part = std::distance(Mask.begin(), It) / SliceSize; + estimateNodesPermuteCost(E1, nullptr, Mask, Part, SliceSize); + if (!SameNodesEstimated && InVectors.size() == 1) + InVectors.emplace_back(&E1); } /// Adds another one input vector and the mask for the shuffling. void add(Value *V1, ArrayRef Mask) { - assert(CommonMask.empty() && InVectors.empty() && - "Expected empty input mask/vectors."); - CommonMask.assign(Mask.begin(), Mask.end()); - InVectors.assign(1, V1); + if (InVectors.empty()) { + assert(CommonMask.empty() && "Expected empty input mask/vectors."); + CommonMask.assign(Mask.begin(), Mask.end()); + InVectors.assign(1, V1); + return; + } + assert(InVectors.size() == 1 && InVectors.front().is() && + !CommonMask.empty() && "Expected only single entry from extracts."); + InVectors.push_back(V1); + unsigned VF = CommonMask.size(); + for (unsigned Idx = 0; Idx < VF; ++Idx) + if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem) + CommonMask[Idx] = Mask[Idx] + VF; } Value *gather(ArrayRef VL, Value *Root = nullptr) { Cost += getBuildVectorCost(VL, Root); @@ -7579,12 +7684,16 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, ArrayRef VL = E->Scalars; Type *ScalarTy = VL[0]->getType(); - if (auto *SI = dyn_cast(VL[0])) - ScalarTy = SI->getValueOperand()->getType(); - else if (auto *CI = dyn_cast(VL[0])) - ScalarTy = CI->getOperand(0)->getType(); - else if (auto *IE = dyn_cast(VL[0])) - ScalarTy = IE->getOperand(1)->getType(); + if (E->State != TreeEntry::NeedToGather) { + if (auto *SI = dyn_cast(VL[0])) + ScalarTy = SI->getValueOperand()->getType(); + else if (auto *CI = dyn_cast(VL[0])) + ScalarTy = CI->getOperand(0)->getType(); + else if (auto *IE = dyn_cast(VL[0])) + ScalarTy = IE->getOperand(1)->getType(); + } + if (!FixedVectorType::isValidElementType(ScalarTy)) + return InstructionCost::getInvalid(); auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; @@ -7596,7 +7705,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, VecTy = FixedVectorType::get(ScalarTy, VL.size()); } unsigned EntryVF = E->getVectorFactor(); - auto *FinalVecTy = FixedVectorType::get(VecTy->getElementType(), EntryVF); + auto *FinalVecTy = FixedVectorType::get(ScalarTy, EntryVF); bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); if (E->State == TreeEntry::NeedToGather) { @@ -7629,20 +7738,28 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, SmallVector Mask; SmallVector ExtractMask; std::optional ExtractShuffle; - std::optional GatherShuffle; - SmallVector Entries; + SmallVector> GatherShuffles; + SmallVector> Entries; // Check for gathered extracts. - ExtractShuffle = tryToGatherSingleRegisterExtractElements(GatheredScalars, ExtractMask); + ExtractShuffle = + tryToGatherSingleRegisterExtractElements(GatheredScalars, ExtractMask); bool Resized = false; + unsigned NumParts = TTI->getNumberOfParts(VecTy); + if (NumParts == 0 || NumParts >= GatheredScalars.size()) + NumParts = 1; if (Value *VecBase = Estimator.adjustExtracts( - E, ExtractMask, ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc))) + E, ExtractMask, ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc))) { if (auto *VecBaseTy = dyn_cast(VecBase->getType())) if (VF == VecBaseTy->getNumElements() && GatheredScalars.size() != VF) { Resized = true; GatheredScalars.append(VF - GatheredScalars.size(), PoisonValue::get(ScalarTy)); } + } else if (ExtractShuffle && + TTI->getNumberOfParts(VecTy) == VecTy->getNumElements()) { + copy(VL, GatheredScalars.begin()); + } // Do not try to look for reshuffled loads for gathered loads (they will be // handled later), for vectorized scalars, and cases, which are definitely @@ -7652,12 +7769,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || isSplat(E->Scalars) || (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) - GatherShuffle = isGatherShuffledEntry(E, GatheredScalars, Mask, Entries); - if (GatherShuffle) { - assert((Entries.size() == 1 || Entries.size() == 2) && - "Expected shuffle of 1 or 2 entries."); - if (*GatherShuffle == TTI::SK_PermuteSingleSrc && - Entries.front()->isSame(E->Scalars)) { + GatherShuffles = + isGatherShuffledEntry(E, GatheredScalars, Mask, Entries, NumParts); + if (!GatherShuffles.empty()) { + if (GatherShuffles.size() == 1 && + *GatherShuffles.front() == TTI::SK_PermuteSingleSrc && + Entries.front().front()->isSame(E->Scalars)) { // Perfect match in the graph, will reuse the previously vectorized // node. Cost is 0. LLVM_DEBUG( @@ -7671,15 +7788,18 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, continue; } if (Mask[I] == PoisonMaskElem) - Mask[I] = Entries.front()->findLaneForValue(V); + Mask[I] = Entries.front().front()->findLaneForValue(V); } - Estimator.add(Entries.front(), Mask); + Estimator.add(*Entries.front().front(), Mask); return Estimator.finalize(E->ReuseShuffleIndices); } if (!Resized) { - unsigned VF1 = Entries.front()->getVectorFactor(); - unsigned VF2 = Entries.back()->getVectorFactor(); - if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF) + if (GatheredScalars.size() != VF && + any_of(Entries, [&](ArrayRef TEs) { + return any_of(TEs, [&](const TreeEntry *TE) { + return TE->getVectorFactor() == VF; + }); + })) GatheredScalars.append(VF - GatheredScalars.size(), PoisonValue::get(ScalarTy)); } @@ -7691,7 +7811,21 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size() << " entries for bundle " << shortBundleName(VL) << ".\n"); - Estimator.add(Entries.front(), Entries.back(), Mask); + unsigned SliceSize = E->Scalars.size() / NumParts; + SmallVector VecMask(Mask.size(), PoisonMaskElem); + for (const auto [I, TEs] : enumerate(Entries)) { + if (TEs.empty()) { + assert(!GatherShuffles[I] && + "No shuffles with empty entries list expected."); + continue; + } + assert((TEs.size() == 1 || TEs.size() == 2) && + "Expected shuffle of 1 or 2 entries."); + auto SubMask = ArrayRef(Mask).slice(I * SliceSize, SliceSize); + VecMask.assign(VecMask.size(), PoisonMaskElem); + copy(SubMask, std::next(VecMask.begin(), I * SliceSize)); + Estimator.add(*TEs.front(), *TEs.back(), VecMask); + } if (all_of(GatheredScalars, PoisonValue ::classof)) return Estimator.finalize(E->ReuseShuffleIndices); return Estimator.finalize( @@ -7705,16 +7839,19 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, if (!all_of(GatheredScalars, PoisonValue::classof)) { auto Gathers = ArrayRef(GatheredScalars).take_front(VL.size()); bool SameGathers = VL.equals(Gathers); - Value *BV = Estimator.gather( - Gathers, SameGathers ? nullptr - : Constant::getNullValue(FixedVectorType::get( - ScalarTy, GatheredScalars.size()))); + if (!SameGathers) + return Estimator.finalize( + E->ReuseShuffleIndices, E->Scalars.size(), + [&](Value *&Vec, SmallVectorImpl &Mask) { + Vec = Estimator.gather( + GatheredScalars, Constant::getNullValue(FixedVectorType::get( + ScalarTy, GatheredScalars.size()))); + }); + Value *BV = Estimator.gather(Gathers); SmallVector ReuseMask(Gathers.size(), PoisonMaskElem); std::iota(ReuseMask.begin(), ReuseMask.end(), 0); Estimator.add(BV, ReuseMask); } - if (ExtractShuffle) - Estimator.add(E, std::nullopt); return Estimator.finalize(E->ReuseShuffleIndices); } InstructionCost CommonCost = 0; @@ -9037,16 +9174,10 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { } std::optional -BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, - SmallVectorImpl &Mask, - SmallVectorImpl &Entries) { +BoUpSLP::isGatherShuffledSingleRegisterEntry( + const TreeEntry *TE, ArrayRef VL, MutableArrayRef Mask, + SmallVectorImpl &Entries, unsigned Part) { Entries.clear(); - // No need to check for the topmost gather node. - if (TE == VectorizableTree.front().get()) - return std::nullopt; - Mask.assign(VL.size(), PoisonMaskElem); - assert(TE->UserTreeIndices.size() == 1 && - "Expected only single user of the gather node."); // TODO: currently checking only for Scalars in the tree entry, need to count // reused elements too for better cost estimation. const EdgeInfo &TEUseEI = TE->UserTreeIndices.front(); @@ -9121,7 +9252,7 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, UserPHI ? UserPHI->getIncomingBlock(UseEI.EdgeIdx)->getTerminator() : &getLastInstructionInBundle(UseEI.UserTE); if (TEInsertPt == InsertPt) { - // If 2 gathers are operands of the same entry (regardless of wether + // If 2 gathers are operands of the same entry (regardless of whether // user is PHI or else), compare operands indices, use the earlier one // as the base. if (TEUseEI.UserTE == UseEI.UserTE && TEUseEI.EdgeIdx < UseEI.EdgeIdx) @@ -9186,8 +9317,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, } } - if (UsedTEs.empty()) + if (UsedTEs.empty()) { + Entries.clear(); return std::nullopt; + } unsigned VF = 0; if (UsedTEs.size() == 1) { @@ -9203,7 +9336,8 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, }); if (It != FirstEntries.end() && (*It)->getVectorFactor() == VL.size()) { Entries.push_back(*It); - std::iota(Mask.begin(), Mask.end(), 0); + std::iota(std::next(Mask.begin(), Part * VL.size()), + std::next(Mask.begin(), (Part + 1) * VL.size()), 0); // Clear undef scalars. for (int I = 0, Sz = VL.size(); I < Sz; ++I) if (isa(VL[I])) @@ -9340,7 +9474,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, TempEntries.push_back(Entries[I]); } Entries.swap(TempEntries); - if (EntryLanes.size() == Entries.size() && !VL.equals(TE->Scalars)) { + if (EntryLanes.size() == Entries.size() && + !VL.equals(ArrayRef(TE->Scalars) + .slice(Part * VL.size(), + std::min(VL.size(), TE->Scalars.size())))) { // We may have here 1 or 2 entries only. If the number of scalars is equal // to the number of entries, no need to do the analysis, it is not very // profitable. Since VL is not the same as TE->Scalars, it means we already @@ -9353,9 +9490,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, // Pair.first is the offset to the vector, while Pair.second is the index of // scalar in the list. for (const std::pair &Pair : EntryLanes) { - Mask[Pair.second] = Pair.first * VF + - Entries[Pair.first]->findLaneForValue(VL[Pair.second]); - IsIdentity &= Mask[Pair.second] == Pair.second; + unsigned Idx = Part * VL.size() + Pair.second; + Mask[Idx] = Pair.first * VF + + Entries[Pair.first]->findLaneForValue(VL[Pair.second]); + IsIdentity &= Mask[Idx] == Pair.second; } switch (Entries.size()) { case 1: @@ -9370,9 +9508,63 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, break; } Entries.clear(); + // Clear the corresponding mask elements. + std::fill(std::next(Mask.begin(), Part * VL.size()), + std::next(Mask.begin(), (Part + 1) * VL.size()), PoisonMaskElem); return std::nullopt; } +SmallVector> +BoUpSLP::isGatherShuffledEntry( + const TreeEntry *TE, ArrayRef VL, SmallVectorImpl &Mask, + SmallVectorImpl> &Entries, + unsigned NumParts) { + assert(NumParts > 0 && NumParts < VL.size() && + "Expected positive number of registers."); + Entries.clear(); + // No need to check for the topmost gather node. + if (TE == VectorizableTree.front().get()) + return {}; + Mask.assign(VL.size(), PoisonMaskElem); + assert(TE->UserTreeIndices.size() == 1 && + "Expected only single user of the gather node."); + assert(VL.size() % NumParts == 0 && + "Number of scalars must be divisible by NumParts."); + unsigned SliceSize = VL.size() / NumParts; + SmallVector> Res; + for (unsigned Part = 0; Part < NumParts; ++Part) { + ArrayRef SubVL = VL.slice(Part * SliceSize, SliceSize); + SmallVectorImpl &SubEntries = Entries.emplace_back(); + std::optional SubRes = + isGatherShuffledSingleRegisterEntry(TE, SubVL, Mask, SubEntries, Part); + if (!SubRes) + SubEntries.clear(); + Res.push_back(SubRes); + if (SubEntries.size() == 1 && + SubRes.value_or(TTI::SK_PermuteTwoSrc) == TTI::SK_PermuteSingleSrc && + SubEntries.front()->getVectorFactor() == VL.size() && + (SubEntries.front()->isSame(TE->Scalars) || + SubEntries.front()->isSame(VL))) { + Entries.clear(); + Res.clear(); + std::iota(Mask.begin(), Mask.end(), 0); + // Clear undef scalars. + for (int I = 0, Sz = VL.size(); I < Sz; ++I) + if (isa(VL[I])) + Mask[I] = PoisonMaskElem; + Entries.emplace_back(1, SubEntries.front()); + Res.push_back(TargetTransformInfo::SK_PermuteSingleSrc); + return Res; + } + } + if (all_of(Res, + [](const std::optional &SK) { return !SK; })) { + Entries.clear(); + return {}; + } + return Res; +} + InstructionCost BoUpSLP::getGatherCost(ArrayRef VL, bool ForPoisonSrc) const { // Find the type of the operands in VL. @@ -9839,9 +10031,13 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { } /// Checks if the specified entry \p E needs to be delayed because of its /// dependency nodes. - Value *needToDelay(const TreeEntry *E, ArrayRef Deps) { + Value *needToDelay(const TreeEntry *E, + ArrayRef> Deps) { // No need to delay emission if all deps are ready. - if (all_of(Deps, [](const TreeEntry *TE) { return TE->VectorizedValue; })) + if (all_of(Deps, [](ArrayRef TEs) { + return all_of( + TEs, [](const TreeEntry *TE) { return TE->VectorizedValue; }); + })) return nullptr; // Postpone gather emission, will be emitted after the end of the // process to keep correct order. @@ -10176,9 +10372,13 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { SmallVector Mask; SmallVector ExtractMask; std::optional ExtractShuffle; - std::optional GatherShuffle; - SmallVector Entries; + SmallVector> GatherShuffles; + SmallVector> Entries; Type *ScalarTy = GatheredScalars.front()->getType(); + unsigned NumParts = TTI->getNumberOfParts( + FixedVectorType::get(ScalarTy, GatheredScalars.size())); + if (NumParts == 0 || NumParts >= GatheredScalars.size()) + NumParts = 1; if (!all_of(GatheredScalars, UndefValue::classof)) { // Check for gathered extracts. ExtractShuffle = @@ -10197,9 +10397,10 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || isSplat(E->Scalars) || (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) { - GatherShuffle = isGatherShuffledEntry(E, GatheredScalars, Mask, Entries); + GatherShuffles = + isGatherShuffledEntry(E, GatheredScalars, Mask, Entries, NumParts); } - if (GatherShuffle) { + if (!GatherShuffles.empty()) { if (Value *Delayed = ShuffleBuilder.needToDelay(E, Entries)) { // Delay emission of gathers which are not ready yet. PostponedGathers.insert(E); @@ -10207,10 +10408,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { // process to keep correct order. return Delayed; } - assert((Entries.size() == 1 || Entries.size() == 2) && - "Expected shuffle of 1 or 2 entries."); - if (*GatherShuffle == TTI::SK_PermuteSingleSrc && - Entries.front()->isSame(E->Scalars)) { + if (GatherShuffles.size() == 1 && + *GatherShuffles.front() == TTI::SK_PermuteSingleSrc && + Entries.front().front()->isSame(E->Scalars)) { // Perfect match in the graph, will reuse the previously vectorized // node. Cost is 0. LLVM_DEBUG( @@ -10218,11 +10418,11 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { << "SLP: perfect diamond match for gather bundle " << shortBundleName(E->Scalars) << ".\n"); // Restore the mask for previous partially matched values. - if (Entries.front()->ReorderIndices.empty() && - ((Entries.front()->ReuseShuffleIndices.empty() && - E->Scalars.size() == Entries.front()->Scalars.size()) || - (E->Scalars.size() == - Entries.front()->ReuseShuffleIndices.size()))) { + const TreeEntry *FrontTE = Entries.front().front(); + if (FrontTE->ReorderIndices.empty() && + ((FrontTE->ReuseShuffleIndices.empty() && + E->Scalars.size() == FrontTE->Scalars.size()) || + (E->Scalars.size() == FrontTE->ReuseShuffleIndices.size()))) { std::iota(Mask.begin(), Mask.end(), 0); } else { for (auto [I, V] : enumerate(E->Scalars)) { @@ -10230,17 +10430,20 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { Mask[I] = PoisonMaskElem; continue; } - Mask[I] = Entries.front()->findLaneForValue(V); + Mask[I] = FrontTE->findLaneForValue(V); } } - ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask); + ShuffleBuilder.add(FrontTE->VectorizedValue, Mask); Res = ShuffleBuilder.finalize(E->getCommonMask()); return Res; } if (!Resized) { - unsigned VF1 = Entries.front()->getVectorFactor(); - unsigned VF2 = Entries.back()->getVectorFactor(); - if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF) + if (GatheredScalars.size() != VF && + any_of(Entries, [&](ArrayRef TEs) { + return any_of(TEs, [&](const TreeEntry *TE) { + return TE->getVectorFactor() == VF; + }); + })) GatheredScalars.append(VF - GatheredScalars.size(), PoisonValue::get(ScalarTy)); } @@ -10340,9 +10543,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { } } }; - if (ExtractShuffle || GatherShuffle) { + if (ExtractShuffle || !GatherShuffles.empty()) { bool IsNonPoisoned = true; - bool IsUsedInExpr = false; + bool IsUsedInExpr = true; Value *Vec1 = nullptr; if (ExtractShuffle) { // Gather of extractelements can be represented as just a shuffle of @@ -10367,36 +10570,53 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { } } if (Vec2) { + IsUsedInExpr = false; IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1) && isGuaranteedNotToBePoison(Vec2); ShuffleBuilder.add(Vec1, Vec2, ExtractMask); } else if (Vec1) { - IsUsedInExpr = FindReusedSplat( + IsUsedInExpr &= FindReusedSplat( ExtractMask, cast(Vec1->getType())->getNumElements()); ShuffleBuilder.add(Vec1, ExtractMask); IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1); } else { + IsUsedInExpr = false; ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get( ScalarTy, GatheredScalars.size())), ExtractMask); } } - if (GatherShuffle) { - if (Entries.size() == 1) { - IsUsedInExpr = FindReusedSplat( - Mask, - cast(Entries.front()->VectorizedValue->getType()) - ->getNumElements()); - ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask); - IsNonPoisoned &= - isGuaranteedNotToBePoison(Entries.front()->VectorizedValue); - } else { - ShuffleBuilder.add(Entries.front()->VectorizedValue, - Entries.back()->VectorizedValue, Mask); - IsNonPoisoned &= - isGuaranteedNotToBePoison(Entries.front()->VectorizedValue) && - isGuaranteedNotToBePoison(Entries.back()->VectorizedValue); + if (!GatherShuffles.empty()) { + unsigned SliceSize = E->Scalars.size() / NumParts; + SmallVector VecMask(Mask.size(), PoisonMaskElem); + for (const auto [I, TEs] : enumerate(Entries)) { + if (TEs.empty()) { + assert(!GatherShuffles[I] && + "No shuffles with empty entries list expected."); + continue; + } + assert((TEs.size() == 1 || TEs.size() == 2) && + "Expected shuffle of 1 or 2 entries."); + auto SubMask = ArrayRef(Mask).slice(I * SliceSize, SliceSize); + VecMask.assign(VecMask.size(), PoisonMaskElem); + copy(SubMask, std::next(VecMask.begin(), I * SliceSize)); + if (TEs.size() == 1) { + IsUsedInExpr &= FindReusedSplat( + VecMask, + cast(TEs.front()->VectorizedValue->getType()) + ->getNumElements()); + ShuffleBuilder.add(TEs.front()->VectorizedValue, VecMask); + IsNonPoisoned &= + isGuaranteedNotToBePoison(TEs.front()->VectorizedValue); + } else { + IsUsedInExpr = false; + ShuffleBuilder.add(TEs.front()->VectorizedValue, + TEs.back()->VectorizedValue, VecMask); + IsNonPoisoned &= + isGuaranteedNotToBePoison(TEs.front()->VectorizedValue) && + isGuaranteedNotToBePoison(TEs.back()->VectorizedValue); + } } } // Try to figure out best way to combine values: build a shuffle and insert @@ -10407,14 +10627,18 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { int MSz = Mask.size(); // Try to build constant vector and shuffle with it only if currently we // have a single permutation and more than 1 scalar constants. - bool IsSingleShuffle = !ExtractShuffle || !GatherShuffle; + bool IsSingleShuffle = !ExtractShuffle || GatherShuffles.empty(); bool IsIdentityShuffle = (ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc) == TTI::SK_PermuteSingleSrc && none_of(ExtractMask, [&](int I) { return I >= EMSz; }) && ShuffleVectorInst::isIdentityMask(ExtractMask, EMSz)) || - (GatherShuffle.value_or(TTI::SK_PermuteTwoSrc) == - TTI::SK_PermuteSingleSrc && + (!GatherShuffles.empty() && + all_of(GatherShuffles, + [](const std::optional &SK) { + return SK.value_or(TTI::SK_PermuteTwoSrc) == + TTI::SK_PermuteSingleSrc; + }) && none_of(Mask, [&](int I) { return I >= MSz; }) && ShuffleVectorInst::isIdentityMask(Mask, MSz)); bool EnoughConstsForShuffle = @@ -10590,7 +10814,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { continue; } - Builder.SetInsertPoint(IBB->getTerminator()); + // if (any_of(E->getOperand(i), [&](Value *V) { + // auto *I = dyn_cast(V); + // return I && I->getParent() == IBB; + // })) + Builder.SetInsertPoint(IBB->getTerminator()); + // else + // Builder.SetInsertPoint(IBB->getFirstNonPHIOrDbgOrLifetime()); Builder.SetCurrentDebugLocation(PH->getDebugLoc()); Value *Vec = vectorizeOperand(E, i, /*PostponedPHIs=*/true); NewPhi->addIncoming(Vec, IBB); @@ -11254,10 +11484,22 @@ Value *BoUpSLP::vectorizeTree( // The is because source vector that supposed to feed this gather node was // inserted at the end of the block [after stab instruction]. So we need // to adjust insertion point again to the end of block. - if (isa(UserI)) - Builder.SetInsertPoint(PrevVec->getParent()->getTerminator()); - else + if (isa(UserI)) { + // Insert before all users. + Instruction *InsertPt = PrevVec->getParent()->getTerminator(); + for (User *U : PrevVec->users()) { + if (U == UserI) + continue; + auto *UI = dyn_cast(U); + if (!UI || isa(UI) || UI->getParent() != InsertPt->getParent()) + continue; + if (UI->comesBefore(InsertPt)) + InsertPt = UI; + } + Builder.SetInsertPoint(InsertPt); + } else { Builder.SetInsertPoint(PrevVec); + } Builder.SetCurrentDebugLocation(UserI->getDebugLoc()); Value *Vec = vectorizeTree(TE, /*PostponedPHIs=*/false); PrevVec->replaceAllUsesWith(Vec); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll index 21aac98aa3ece..e5b5a5c6c4a00 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-107 | FileCheck %s -; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-107 -mattr=+avx2 | FileCheck %s +; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-115 | FileCheck %s +; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-115 -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 define void @test(i64 %p0, i64 %p1, i64 %p2, i64 %p3) { ; CHECK-LABEL: @test( @@ -14,18 +14,43 @@ define void @test(i64 %p0, i64 %p1, i64 %p2, i64 %p3) { ; CHECK-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[TMP3]], [[TMP3]] ; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i64> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP4]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i64> [[TMP10]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> [[TMP13]], <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i64> [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i64> [[TMP15]] to <4 x i32> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP16:%.*]], [[BB]] ], [ [[TMP14]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP16]] = trunc <4 x i64> [[TMP8]] to <4 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP18:%.*]], [[BB]] ], [ [[TMP16]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP18]] = trunc <4 x i64> [[TMP8]] to <4 x i32> ; CHECK-NEXT: br label [[BB]] ; +; AVX2-LABEL: @test( +; AVX2-NEXT: entry: +; AVX2-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[P0:%.*]], i32 0 +; AVX2-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[P1:%.*]], i32 1 +; AVX2-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[P2:%.*]], i32 2 +; AVX2-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[P3:%.*]], i32 3 +; AVX2-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP3]], [[TMP3]] +; AVX2-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[TMP3]], [[TMP3]] +; AVX2-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[TMP3]], [[TMP3]] +; AVX2-NEXT: [[TMP7:%.*]] = sub <4 x i64> [[TMP5]], [[TMP6]] +; AVX2-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP4]], [[TMP7]] +; AVX2-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> +; AVX2-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP6]], <4 x i32> +; AVX2-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> +; AVX2-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP6]], <4 x i32> +; AVX2-NEXT: [[TMP13:%.*]] = or <4 x i64> [[TMP10]], [[TMP12]] +; AVX2-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32> +; AVX2-NEXT: br label [[BB:%.*]] +; AVX2: bb: +; AVX2-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP16:%.*]], [[BB]] ], [ [[TMP14]], [[ENTRY:%.*]] ] +; AVX2-NEXT: [[TMP16]] = trunc <4 x i64> [[TMP8]] to <4 x i32> +; AVX2-NEXT: br label [[BB]] +; entry: %a0 = add i64 %p0, %p0 %a1 = add i64 %p1, %p1 From bc792a284362696c91599f9ab01f74eda4b9108f Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 26 Oct 2023 09:05:16 -0400 Subject: [PATCH 066/877] [libc++] Encode additional ODR-affecting properties in the ABI tag (#69669) As explained in `__config`, we have an ABI tag that we use to ensure that we don't run into ODR issues when mixing different versions of libc++ in multiple TUs. However, the reasoning behind that extends not only to different versions of libc++, but also to different configurations of the same version of libc++. In fact, we've been aware of this for a while but never really bothered to make the change because ODR issues are often thought to be benign. Well, it turns out that I just spent over an hour banging my head against an issue that boils down to our lack of encoding of some ODR properties in the ABI tag, so here's the patch we should have done a long time ago. For now, the ODR properties we encode in the ABI tag are: - library version - exceptions vs no-exceptions - hardening mode Those are all things that we support different values for on a per-TU basis and they definitely affect ODR in a meaningful way. We can add more properties later as we see fit. --- libcxx/include/__config | 58 +++++++++++---- .../libcxx/odr_signature.exceptions.sh.cpp | 46 ++++++++++++ .../libcxx/odr_signature.hardening.sh.cpp | 72 +++++++++++++++++++ 3 files changed, 161 insertions(+), 15 deletions(-) create mode 100644 libcxx/test/libcxx/odr_signature.exceptions.sh.cpp create mode 100644 libcxx/test/libcxx/odr_signature.hardening.sh.cpp diff --git a/libcxx/include/__config b/libcxx/include/__config index 65ce6d6a27f83..4bf171f998c6f 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -56,10 +56,6 @@ # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) -// Valid C++ identifier that revs with every libc++ version. This can be used to -// generate identifiers that must be unique for every released libc++ version. -# define _LIBCPP_VERSIONED_IDENTIFIER _LIBCPP_CONCAT(v, _LIBCPP_VERSION) - # if __STDC_HOSTED__ == 0 # define _LIBCPP_FREESTANDING # endif @@ -734,22 +730,54 @@ typedef __char32_t char32_t; # define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION _LIBCPP_ALWAYS_INLINE # endif +# if _LIBCPP_ENABLE_HARDENED_MODE +# define _LIBCPP_HARDENING_SIG h +# elif _LIBCPP_ENABLE_SAFE_MODE +# define _LIBCPP_HARDENING_SIG s +# elif _LIBCPP_ENABLE_DEBUG_MODE +# define _LIBCPP_HARDENING_SIG d +# else +# define _LIBCPP_HARDENING_SIG u // for unchecked +# endif + +# ifdef _LIBCPP_HAS_NO_EXCEPTIONS +# define _LIBCPP_EXCEPTIONS_SIG n +# else +# define _LIBCPP_EXCEPTIONS_SIG e +# endif + +# define _LIBCPP_ODR_SIGNATURE \ + _LIBCPP_CONCAT(_LIBCPP_CONCAT(_LIBCPP_HARDENING_SIG, _LIBCPP_EXCEPTIONS_SIG), _LIBCPP_VERSION) + // This macro marks a symbol as being hidden from libc++'s ABI. This is achieved // on two levels: // 1. The symbol is given hidden visibility, which ensures that users won't start exporting // symbols from their dynamic library by means of using the libc++ headers. This ensures // that those symbols stay private to the dynamic library in which it is defined. // -// 2. The symbol is given an ABI tag that changes with each version of libc++. This ensures -// that no ODR violation can arise from mixing two TUs compiled with different versions -// of libc++ where we would have changed the definition of a symbol. If the symbols shared -// the same name, the ODR would require that their definitions be token-by-token equivalent, -// which basically prevents us from being able to make any change to any function in our -// headers. Using this ABI tag ensures that the symbol name is "bumped" artificially at -// each release, which lets us change the definition of these symbols at our leisure. -// Note that historically, this has been achieved in various ways, including force-inlining -// all functions or giving internal linkage to all functions. Both these (previous) solutions -// suffer from drawbacks that lead notably to code bloat. +// 2. The symbol is given an ABI tag that encodes the ODR-relevant properties of the library. +// This ensures that no ODR violation can arise from mixing two TUs compiled with different +// versions or configurations of libc++ (such as exceptions vs no-exceptions). Indeed, if the +// program contains two definitions of a function, the ODR requires them to be token-by-token +// equivalent, and the linker is allowed to pick either definition and discard the other one. +// +// For example, if a program contains a copy of `vector::at()` compiled with exceptions enabled +// *and* a copy of `vector::at()` compiled with exceptions disabled (by means of having two TUs +// compiled with different settings), the two definitions are both visible by the linker and they +// have the same name, but they have a meaningfully different implementation (one throws an exception +// and the other aborts the program). This violates the ODR and makes the program ill-formed, and in +// practice what will happen is that the linker will pick one of the definitions at random and will +// discard the other one. This can quite clearly lead to incorrect program behavior. +// +// A similar reasoning holds for many other properties that are ODR-affecting. Essentially any +// property that causes the code of a function to differ from the code in another configuration +// can be considered ODR-affecting. In practice, we don't encode all such properties in the ABI +// tag, but we encode the ones that we think are most important: library version, exceptions, and +// hardening mode. +// +// Note that historically, solving this problem has been achieved in various ways, including +// force-inlining all functions or giving internal linkage to all functions. Both these previous +// solutions suffer from drawbacks that lead notably to code bloat. // // Note that we use _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION to ensure that we don't depend // on _LIBCPP_HIDE_FROM_ABI methods of classes explicitly instantiated in the dynamic library. @@ -769,7 +797,7 @@ typedef __char32_t char32_t; # ifndef _LIBCPP_NO_ABI_TAG # define _LIBCPP_HIDE_FROM_ABI \ _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION \ - __attribute__((__abi_tag__(_LIBCPP_TOSTRING(_LIBCPP_VERSIONED_IDENTIFIER)))) + __attribute__((__abi_tag__(_LIBCPP_TOSTRING(_LIBCPP_ODR_SIGNATURE)))) # else # define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION # endif diff --git a/libcxx/test/libcxx/odr_signature.exceptions.sh.cpp b/libcxx/test/libcxx/odr_signature.exceptions.sh.cpp new file mode 100644 index 0000000000000..6bf60b5e82d3c --- /dev/null +++ b/libcxx/test/libcxx/odr_signature.exceptions.sh.cpp @@ -0,0 +1,46 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// TODO: Investigate +// XFAIL: msvc + +// Test that we encode whether exceptions are supported in an ABI tag to avoid +// ODR violations when linking TUs that have different values for it. + +// RUN: %{cxx} %s %{flags} %{compile_flags} -c -DTU1 -fno-exceptions -o %t.tu1.o +// RUN: %{cxx} %s %{flags} %{compile_flags} -c -DTU2 -fexceptions -o %t.tu2.o +// RUN: %{cxx} %s %{flags} %{compile_flags} -c -DMAIN -o %t.main.o +// RUN: %{cxx} %t.tu1.o %t.tu2.o %t.main.o %{flags} %{link_flags} -o %t.exe +// RUN: %{exec} %t.exe + +// -fno-exceptions +#ifdef TU1 +# include <__config> +_LIBCPP_HIDE_FROM_ABI inline int f() { return 1; } +int tu1() { return f(); } +#endif // TU1 + +// -fexceptions +#ifdef TU2 +# include <__config> +_LIBCPP_HIDE_FROM_ABI inline int f() { return 2; } +int tu2() { return f(); } +#endif // TU2 + +#ifdef MAIN +# include + +int tu1(); +int tu2(); + +int main(int, char**) { + assert(tu1() == 1); + assert(tu2() == 2); + return 0; +} +#endif // MAIN diff --git a/libcxx/test/libcxx/odr_signature.hardening.sh.cpp b/libcxx/test/libcxx/odr_signature.hardening.sh.cpp new file mode 100644 index 0000000000000..3ae95c8910a92 --- /dev/null +++ b/libcxx/test/libcxx/odr_signature.hardening.sh.cpp @@ -0,0 +1,72 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// TODO: Remove these UNSUPPORTED lines once we change how hardening is enabled to avoid +// mutually exclusive modes being enabled at the same time. +// UNSUPPORTED: libcpp-hardening-mode=hardened +// UNSUPPORTED: libcpp-hardening-mode=safe +// UNSUPPORTED: libcpp-hardening-mode=debug + +// TODO: Investigate +// XFAIL: msvc + +// Test that we encode the hardening mode in an ABI tag to avoid ODR violations +// when linking TUs that have different values for it. + +// RUN: %{cxx} %s %{flags} %{compile_flags} -c -DTU1 -D_LIBCPP_ENABLE_HARDENED_MODE -o %t.tu1.o +// RUN: %{cxx} %s %{flags} %{compile_flags} -c -DTU2 -D_LIBCPP_ENABLE_SAFE_MODE -o %t.tu2.o +// RUN: %{cxx} %s %{flags} %{compile_flags} -c -DTU3 -D_LIBCPP_ENABLE_DEBUG_MODE -o %t.tu3.o +// RUN: %{cxx} %s %{flags} %{compile_flags} -c -DTU4 -o %t.tu4.o +// RUN: %{cxx} %s %{flags} %{compile_flags} -c -DMAIN -o %t.main.o +// RUN: %{cxx} %t.tu1.o %t.tu2.o %t.tu3.o %t.tu4.o %t.main.o %{flags} %{link_flags} -o %t.exe +// RUN: %{exec} %t.exe + +// hardened mode +#ifdef TU1 +# include <__config> +_LIBCPP_HIDE_FROM_ABI inline int f() { return 1; } +int tu1() { return f(); } +#endif // TU1 + +// safe mode +#ifdef TU2 +# include <__config> +_LIBCPP_HIDE_FROM_ABI inline int f() { return 2; } +int tu2() { return f(); } +#endif // TU2 + +// debug mode +#ifdef TU3 +# include <__config> +_LIBCPP_HIDE_FROM_ABI inline int f() { return 3; } +int tu3() { return f(); } +#endif // TU3 + +// unchecked mode +#ifdef TU4 +# include <__config> +_LIBCPP_HIDE_FROM_ABI inline int f() { return 4; } +int tu4() { return f(); } +#endif // TU4 + +#ifdef MAIN +# include + +int tu1(); +int tu2(); +int tu3(); +int tu4(); + +int main(int, char**) { + assert(tu1() == 1); + assert(tu2() == 2); + assert(tu3() == 3); + assert(tu4() == 4); + return 0; +} +#endif // MAIN From 6282b745e09d57a29e8221db638a7d393d66608d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 26 Oct 2023 15:09:42 +0200 Subject: [PATCH 067/877] [clang][Interp][NFC] Fix a doc comment --- clang/lib/AST/Interp/Descriptor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/Interp/Descriptor.h b/clang/lib/AST/Interp/Descriptor.h index 2fd4e92082645..6ee9bbe85d71d 100644 --- a/clang/lib/AST/Interp/Descriptor.h +++ b/clang/lib/AST/Interp/Descriptor.h @@ -84,7 +84,7 @@ struct Descriptor final { const unsigned ElemSize; /// Size of the storage, in host bytes. const unsigned Size; - // Size of the metadata. + /// Size of the metadata. const unsigned MDSize; /// Size of the allocation (storage + metadata), in host bytes. const unsigned AllocSize; From 1b6b4d6a08321fb914127dadcd6677dcd9b1b222 Mon Sep 17 00:00:00 2001 From: Qizhi Hu <836744285@qq.com> Date: Thu, 26 Oct 2023 21:11:51 +0800 Subject: [PATCH 068/877] [analyzer] Loop should contain CXXForRangeStmt (#70190) Static analyze can't report diagnose when statement after a CXXForRangeStmt and enable widen, because `ExprEngine::processCFGBlockEntrance` lacks of CXXForRangeStmt and when `AMgr.options.maxBlockVisitOnPath - 1` equals to `blockCount`, it can't widen. After next iteration, `BlockCount >= AMgr.options.maxBlockVisitOnPath` holds and generate a sink node. Add `CXXForRangeStmt` makes it work. Co-authored-by: huqizhi <836744285@qq.com> --- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 2 +- clang/lib/StaticAnalyzer/Core/LoopWidening.cpp | 4 +++- clang/test/Analysis/loop-widening-notes.cpp | 12 ++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 451ee91b94533..2e67fb953e456 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -2509,7 +2509,7 @@ void ExprEngine::processCFGBlockEntrance(const BlockEdge &L, if (BlockCount == AMgr.options.maxBlockVisitOnPath - 1 && AMgr.options.ShouldWidenLoops) { const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminatorStmt(); - if (!isa_and_nonnull(Term)) + if (!isa_and_nonnull(Term)) return; // Widen. const LocationContext *LCtx = Pred->getLocationContext(); diff --git a/clang/lib/StaticAnalyzer/Core/LoopWidening.cpp b/clang/lib/StaticAnalyzer/Core/LoopWidening.cpp index a3b29ff487e4e..9e42801760622 100644 --- a/clang/lib/StaticAnalyzer/Core/LoopWidening.cpp +++ b/clang/lib/StaticAnalyzer/Core/LoopWidening.cpp @@ -35,6 +35,8 @@ static const Expr *getLoopCondition(const Stmt *LoopStmt) { return cast(LoopStmt)->getCond(); case Stmt::DoStmtClass: return cast(LoopStmt)->getCond(); + case Stmt::CXXForRangeStmtClass: + return cast(LoopStmt)->getCond(); } } @@ -45,7 +47,7 @@ ProgramStateRef getWidenedLoopState(ProgramStateRef PrevState, const LocationContext *LCtx, unsigned BlockCount, const Stmt *LoopStmt) { - assert((isa(LoopStmt))); + assert((isa(LoopStmt))); // Invalidate values in the current state. // TODO Make this more conservative by only invalidating values that might diff --git a/clang/test/Analysis/loop-widening-notes.cpp b/clang/test/Analysis/loop-widening-notes.cpp index 0ba71d030d058..a3f030dfe9882 100644 --- a/clang/test/Analysis/loop-widening-notes.cpp +++ b/clang/test/Analysis/loop-widening-notes.cpp @@ -70,3 +70,15 @@ int test_for_loop() { return flag_d / num; // no-crash expected-warning {{Division by zero}} // expected-note@-1 {{Division by zero}} } + +int test_for_range_loop() { + int arr[10] = {0}; + for(auto x : arr) { // expected-note {{Assigning value}} + ++x; + } + if (arr[0] == 0) // expected-note {{Assuming the condition is true}} + // expected-note@-1 {{Taking true branch}} + return 1/arr[0]; // expected-warning {{Division by zero}} + // expected-note@-1 {{Division by zero}} + return 0; +} From 7f677fe3100131214386f9ce1fa308c235a595e9 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 26 Oct 2023 15:15:25 +0200 Subject: [PATCH 069/877] [clang][Interp] Add explicit dummy descriptors (#68888) Instead of (ab)using incomplete array types for this, add a 'Dummy' bit to Descriptor. We need to be able to differentiate between the two when adding an offset. --- clang/lib/AST/Interp/Descriptor.cpp | 7 +++++++ clang/lib/AST/Interp/Descriptor.h | 6 ++++++ clang/lib/AST/Interp/Interp.cpp | 6 ++++++ clang/lib/AST/Interp/Interp.h | 20 ++++++++++++++------ clang/lib/AST/Interp/InterpBuiltin.cpp | 3 +++ clang/lib/AST/Interp/Pointer.h | 2 ++ clang/lib/AST/Interp/Program.cpp | 20 +++++++++++--------- clang/test/AST/Interp/c.c | 20 ++++++++++++++++++++ 8 files changed, 69 insertions(+), 15 deletions(-) diff --git a/clang/lib/AST/Interp/Descriptor.cpp b/clang/lib/AST/Interp/Descriptor.cpp index 2a21f60588d46..59a952135a2d8 100644 --- a/clang/lib/AST/Interp/Descriptor.cpp +++ b/clang/lib/AST/Interp/Descriptor.cpp @@ -296,6 +296,13 @@ Descriptor::Descriptor(const DeclTy &D, Record *R, MetadataSize MD, assert(Source && "Missing source"); } +Descriptor::Descriptor(const DeclTy &D, MetadataSize MD) + : Source(D), ElemSize(1), Size(ElemSize), MDSize(MD.value_or(0)), + AllocSize(Size + MDSize), ElemRecord(nullptr), IsConst(true), + IsMutable(false), IsTemporary(false), IsDummy(true) { + assert(Source && "Missing source"); +} + QualType Descriptor::getType() const { if (auto *E = asExpr()) return E->getType(); diff --git a/clang/lib/AST/Interp/Descriptor.h b/clang/lib/AST/Interp/Descriptor.h index 6ee9bbe85d71d..8135f3d12f703 100644 --- a/clang/lib/AST/Interp/Descriptor.h +++ b/clang/lib/AST/Interp/Descriptor.h @@ -111,6 +111,8 @@ struct Descriptor final { const bool IsTemporary = false; /// Flag indicating if the block is an array. const bool IsArray = false; + /// Flag indicating if this is a dummy descriptor. + const bool IsDummy = false; /// Storage management methods. const BlockCtorFn CtorFn = nullptr; @@ -139,6 +141,8 @@ struct Descriptor final { Descriptor(const DeclTy &D, Record *R, MetadataSize MD, bool IsConst, bool IsTemporary, bool IsMutable); + Descriptor(const DeclTy &D, MetadataSize MD); + QualType getType() const; QualType getElemQualType() const; SourceLocation getLocation() const; @@ -192,6 +196,8 @@ struct Descriptor final { bool isArray() const { return IsArray; } /// Checks if the descriptor is of a record. bool isRecord() const { return !IsArray && ElemRecord; } + /// Checks if this is a dummy descriptor. + bool isDummy() const { return IsDummy; } }; /// Bitfield tracking the initialisation status of elements of primitive arrays. diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp index 4a4c0922758c9..31d43b6010c18 100644 --- a/clang/lib/AST/Interp/Interp.cpp +++ b/clang/lib/AST/Interp/Interp.cpp @@ -215,6 +215,10 @@ bool CheckLive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, return true; } +bool CheckDummy(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { + return !Ptr.isDummy(); +} + bool CheckNull(InterpState &S, CodePtr OpPC, const Pointer &Ptr, CheckSubobjectKind CSK) { if (!Ptr.isZero()) @@ -297,6 +301,8 @@ bool CheckInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr, } bool CheckLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { + if (!CheckDummy(S, OpPC, Ptr)) + return false; if (!CheckLive(S, OpPC, Ptr, AK_Read)) return false; if (!CheckExtern(S, OpPC, Ptr)) diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index 86cc267652951..3e1b4f32e8b69 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -55,6 +55,10 @@ bool CheckArray(InterpState &S, CodePtr OpPC, const Pointer &Ptr); /// Checks if a pointer is live and accessible. bool CheckLive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK); + +/// Checks if a pointer is a dummy pointer. +bool CheckDummy(InterpState &S, CodePtr OpPC, const Pointer &Ptr); + /// Checks if a pointer is null. bool CheckNull(InterpState &S, CodePtr OpPC, const Pointer &Ptr, CheckSubobjectKind CSK); @@ -1415,8 +1419,9 @@ bool OffsetHelper(InterpState &S, CodePtr OpPC, const T &Offset, // Compute the largest index into the array. T MaxIndex = T::from(Ptr.getNumElems(), Offset.bitWidth()); + bool Invalid = false; // Helper to report an invalid offset, computed as APSInt. - auto InvalidOffset = [&]() { + auto DiagInvalidOffset = [&]() -> void { const unsigned Bits = Offset.bitWidth(); APSInt APOffset(Offset.toAPSInt().extend(Bits + 2), false); APSInt APIndex(Index.toAPSInt().extend(Bits + 2), false); @@ -1426,28 +1431,31 @@ bool OffsetHelper(InterpState &S, CodePtr OpPC, const T &Offset, << NewIndex << /*array*/ static_cast(!Ptr.inArray()) << static_cast(MaxIndex); - return false; + Invalid = true; }; T MaxOffset = T::from(MaxIndex - Index, Offset.bitWidth()); if constexpr (Op == ArithOp::Add) { // If the new offset would be negative, bail out. if (Offset.isNegative() && (Offset.isMin() || -Offset > Index)) - return InvalidOffset(); + DiagInvalidOffset(); // If the new offset would be out of bounds, bail out. if (Offset.isPositive() && Offset > MaxOffset) - return InvalidOffset(); + DiagInvalidOffset(); } else { // If the new offset would be negative, bail out. if (Offset.isPositive() && Index < Offset) - return InvalidOffset(); + DiagInvalidOffset(); // If the new offset would be out of bounds, bail out. if (Offset.isNegative() && (Offset.isMin() || -Offset > MaxOffset)) - return InvalidOffset(); + DiagInvalidOffset(); } + if (Invalid && !Ptr.isDummy()) + return false; + // Offset is valid - compute it on unsigned. int64_t WideIndex = static_cast(Index); int64_t WideOffset = static_cast(Offset); diff --git a/clang/lib/AST/Interp/InterpBuiltin.cpp b/clang/lib/AST/Interp/InterpBuiltin.cpp index 7552c1b88cff6..e329794cb7924 100644 --- a/clang/lib/AST/Interp/InterpBuiltin.cpp +++ b/clang/lib/AST/Interp/InterpBuiltin.cpp @@ -152,6 +152,9 @@ static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC, if (!CheckLive(S, OpPC, StrPtr, AK_Read)) return false; + if (!CheckDummy(S, OpPC, StrPtr)) + return false; + assert(StrPtr.getFieldDesc()->isPrimitiveArray()); size_t Len = 0; diff --git a/clang/lib/AST/Interp/Pointer.h b/clang/lib/AST/Interp/Pointer.h index 65d710077fd1c..b371b306fe7a7 100644 --- a/clang/lib/AST/Interp/Pointer.h +++ b/clang/lib/AST/Interp/Pointer.h @@ -314,6 +314,8 @@ class Pointer { bool isActive() const { return Base == 0 || getInlineDesc()->IsActive; } /// Checks if a structure is a base class. bool isBaseClass() const { return isField() && getInlineDesc()->IsBase; } + /// Checks if the pointer pointers to a dummy value. + bool isDummy() const { return getDeclDesc()->isDummy(); } /// Checks if an object or a subfield is mutable. bool isConst() const { diff --git a/clang/lib/AST/Interp/Program.cpp b/clang/lib/AST/Interp/Program.cpp index 65e170881e313..c6d19afd7d221 100644 --- a/clang/lib/AST/Interp/Program.cpp +++ b/clang/lib/AST/Interp/Program.cpp @@ -144,16 +144,18 @@ std::optional Program::getOrCreateDummy(const ValueDecl *PD) { It != DummyParams.end()) return It->second; - // Create a pointer to an incomplete array of the specified elements. - QualType ElemTy = PD->getType(); - QualType Ty = - Ctx.getASTContext().getIncompleteArrayType(ElemTy, ArrayType::Normal, 0); + // Create dummy descriptor. + Descriptor *Desc = allocateDescriptor(PD, std::nullopt); + // Allocate a block for storage. + unsigned I = Globals.size(); - if (auto Idx = createGlobal(PD, Ty, /*isStatic=*/true, /*isExtern=*/true)) { - DummyParams[PD] = *Idx; - return Idx; - } - return std::nullopt; + auto *G = new (Allocator, Desc->getAllocSize()) + Global(getCurrentDecl(), Desc, /*IsStatic=*/true, /*IsExtern=*/false); + G->block()->invokeCtor(); + + Globals.push_back(G); + DummyParams[PD] = I; + return I; } std::optional Program::createGlobal(const ValueDecl *VD, diff --git a/clang/test/AST/Interp/c.c b/clang/test/AST/Interp/c.c index 974ca72702f7d..e8aa8b8599f21 100644 --- a/clang/test/AST/Interp/c.c +++ b/clang/test/AST/Interp/c.c @@ -47,3 +47,23 @@ _Static_assert(&a != 0, ""); // ref-warning {{always true}} \ // expected-warning {{always true}} \ // pedantic-expected-warning {{always true}} \ // pedantic-expected-warning {{is a GNU extension}} +_Static_assert((&c + 1) != 0, ""); // pedantic-ref-warning {{is a GNU extension}} \ + // pedantic-expected-warning {{is a GNU extension}} +_Static_assert((&a + 100) != 0, ""); // pedantic-ref-warning {{is a GNU extension}} \ + // pedantic-ref-note {{100 of non-array}} \ + // pedantic-expected-note {{100 of non-array}} \ + // pedantic-expected-warning {{is a GNU extension}} +_Static_assert((&a - 100) != 0, ""); // pedantic-ref-warning {{is a GNU extension}} \ + // pedantic-expected-warning {{is a GNU extension}} \ + // pedantic-ref-note {{-100 of non-array}} \ + // pedantic-expected-note {{-100 of non-array}} +/// extern variable of a composite type. +/// FIXME: The 'cast from void*' note is missing in the new interpreter. +extern struct Test50S Test50; +_Static_assert(&Test50 != (void*)0, ""); // ref-warning {{always true}} \ + // pedantic-ref-warning {{always true}} \ + // pedantic-ref-warning {{is a GNU extension}} \ + // pedantic-ref-note {{cast from 'void *' is not allowed}} \ + // expected-warning {{always true}} \ + // pedantic-expected-warning {{always true}} \ + // pedantic-expected-warning {{is a GNU extension}} From 585da2651ff5d3a2645aa54813fb2b7928d88f55 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 26 Oct 2023 14:39:37 +0100 Subject: [PATCH 070/877] [SLP][X86] Regenerate hadd/hsub tests with full set of check-prefixes Prep for D148855 --- .../SLPVectorizer/X86/hadd-inseltpoison.ll | 36 ++++++++++--------- .../test/Transforms/SLPVectorizer/X86/hadd.ll | 36 ++++++++++--------- .../SLPVectorizer/X86/hsub-inseltpoison.ll | 19 ++++++---- .../test/Transforms/SLPVectorizer/X86/hsub.ll | 19 ++++++---- 4 files changed, 64 insertions(+), 46 deletions(-) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll index 5223cac571489..0217ddcac0046 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; ; 128-bit vectors @@ -215,17 +215,17 @@ define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b ; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3 ; SLM-NEXT: ret <4 x double> [[R03]] ; -; AVX-LABEL: @test_v4f64_partial_swizzle( -; AVX-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; AVX-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; AVX-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> -; AVX-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; AVX-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 -; AVX-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; AVX-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> -; AVX-NEXT: ret <4 x double> [[R031]] +; AVX1-LABEL: @test_v4f64_partial_swizzle( +; AVX1-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; AVX1-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; AVX1-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] +; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> +; AVX1-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; AVX1-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 +; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; AVX1-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> +; AVX1-NEXT: ret <4 x double> [[R031]] ; ; AVX2-LABEL: @test_v4f64_partial_swizzle( ; AVX2-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 @@ -448,3 +448,5 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { %rv15 = insertelement <16 x i16> %rv14, i16 %r15, i32 15 ret <16 x i16> %rv15 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll index 45514a7a7150d..c38d116a7a323 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; ; 128-bit vectors @@ -215,17 +215,17 @@ define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b ; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3 ; SLM-NEXT: ret <4 x double> [[R03]] ; -; AVX-LABEL: @test_v4f64_partial_swizzle( -; AVX-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; AVX-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; AVX-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> -; AVX-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; AVX-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 -; AVX-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; AVX-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> -; AVX-NEXT: ret <4 x double> [[R031]] +; AVX1-LABEL: @test_v4f64_partial_swizzle( +; AVX1-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; AVX1-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; AVX1-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] +; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> +; AVX1-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; AVX1-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 +; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; AVX1-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> +; AVX1-NEXT: ret <4 x double> [[R031]] ; ; AVX2-LABEL: @test_v4f64_partial_swizzle( ; AVX2-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 @@ -448,3 +448,5 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { %rv15 = insertelement <16 x i16> %rv14, i16 %r15, i32 15 ret <16 x i16> %rv15 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll index cb05e46b466f2..39400ba4ce1e8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; ; 128-bit vectors @@ -353,3 +353,10 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { %rv15 = insertelement <16 x i16> %rv14, i16 %r15, i32 15 ret <16 x i16> %rv15 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} +; AVX1: {{.*}} +; AVX2: {{.*}} +; AVX512: {{.*}} +; SLM: {{.*}} +; SSE: {{.*}} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll index 7f6f24ce8fda1..6b63de83c56be 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; ; 128-bit vectors @@ -353,3 +353,10 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { %rv15 = insertelement <16 x i16> %rv14, i16 %r15, i32 15 ret <16 x i16> %rv15 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} +; AVX1: {{.*}} +; AVX2: {{.*}} +; AVX512: {{.*}} +; SLM: {{.*}} +; SSE: {{.*}} From 2633d94f289b3d5aa86a1b00591a3c755ce693fa Mon Sep 17 00:00:00 2001 From: Gil Rapaport Date: Thu, 26 Oct 2023 16:40:18 +0300 Subject: [PATCH 071/877] [mlir][emitc] Add a structured for operation (#68206) Add an emitc.for op to the EmitC dialect as a lowering target for scf.for, replacing its current direct translation to C; The translator now handles emitc.for instead. --- mlir/docs/Dialects/emitc.md | 3 - mlir/include/mlir/Dialect/EmitC/IR/EmitC.td | 64 ++++++++- mlir/lib/Conversion/SCFToEmitC/SCFToEmitC.cpp | 123 ++++++++++++++---- mlir/lib/Dialect/EmitC/IR/EmitC.cpp | 95 ++++++++++++++ mlir/lib/Target/Cpp/TranslateToCpp.cpp | 85 +----------- mlir/test/Conversion/SCFToEmitC/for.mlir | 96 ++++++++++++++ mlir/test/Dialect/EmitC/invalid_ops.mlir | 2 +- mlir/test/Dialect/EmitC/ops.mlir | 24 +++- mlir/test/Target/Cpp/for.mlir | 56 ++++++-- 9 files changed, 426 insertions(+), 122 deletions(-) create mode 100644 mlir/test/Conversion/SCFToEmitC/for.mlir diff --git a/mlir/docs/Dialects/emitc.md b/mlir/docs/Dialects/emitc.md index 4d9f04ab11c8f..03b85611ee3cd 100644 --- a/mlir/docs/Dialects/emitc.md +++ b/mlir/docs/Dialects/emitc.md @@ -31,8 +31,5 @@ translating the following operations: * `func.constant` * `func.func` * `func.return` -* 'scf' Dialect - * `scf.for` - * `scf.yield` * 'arith' Dialect * `arith.constant` diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td index 827ffc0278fce..2edeb6f8a9cf0 100644 --- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td @@ -246,6 +246,67 @@ def EmitC_DivOp : EmitC_BinaryOp<"div", []> { let results = (outs FloatIntegerIndexOrOpaqueType); } +def EmitC_ForOp : EmitC_Op<"for", + [AllTypesMatch<["lowerBound", "upperBound", "step"]>, + SingleBlockImplicitTerminator<"emitc::YieldOp">, + RecursiveMemoryEffects]> { + let summary = "for operation"; + let description = [{ + The `emitc.for` operation represents a C loop of the following form: + + ```c++ + for (T i = lb; i < ub; i += step) { /* ... */ } // where T is typeof(lb) + ``` + + The operation takes 3 SSA values as operands that represent the lower bound, + upper bound and step respectively, and defines an SSA value for its + induction variable. It has one region capturing the loop body. The induction + variable is represented as an argument of this region. This SSA value is a + signless integer or index. The step is a value of same type. + + This operation has no result. The body region must contain exactly one block + that terminates with `emitc.yield`. Calling ForOp::build will create such a + region and insert the terminator implicitly if none is defined, so will the + parsing even in cases when it is absent from the custom format. For example: + + ```mlir + // Index case. + emitc.for %iv = %lb to %ub step %step { + ... // body + } + ... + // Integer case. + emitc.for %iv_32 = %lb_32 to %ub_32 step %step_32 : i32 { + ... // body + } + ``` + }]; + let arguments = (ins IntegerIndexOrOpaqueType:$lowerBound, + IntegerIndexOrOpaqueType:$upperBound, + IntegerIndexOrOpaqueType:$step); + let results = (outs); + let regions = (region SizedRegion<1>:$region); + + let skipDefaultBuilders = 1; + let builders = [ + OpBuilder<(ins "Value":$lowerBound, "Value":$upperBound, "Value":$step, + CArg<"function_ref", "nullptr">)> + ]; + + let extraClassDeclaration = [{ + using BodyBuilderFn = + function_ref; + Value getInductionVar() { return getBody()->getArgument(0); } + void setLowerBound(Value bound) { getOperation()->setOperand(0, bound); } + void setUpperBound(Value bound) { getOperation()->setOperand(1, bound); } + void setStep(Value step) { getOperation()->setOperand(2, step); } + }]; + + let hasCanonicalizer = 1; + let hasCustomAssemblyFormat = 1; + let hasRegionVerifier = 1; +} + def EmitC_IncludeOp : EmitC_Op<"include", [HasParent<"ModuleOp">]> { let summary = "Include operation"; @@ -430,7 +491,8 @@ def EmitC_AssignOp : EmitC_Op<"assign", []> { let assemblyFormat = "$value `:` type($value) `to` $var `:` type($var) attr-dict"; } -def EmitC_YieldOp : EmitC_Op<"yield", [Pure, Terminator, ParentOneOf<["IfOp"]>]> { +def EmitC_YieldOp : EmitC_Op<"yield", + [Pure, Terminator, ParentOneOf<["IfOp", "ForOp"]>]> { let summary = "block termination operation"; let description = [{ "yield" terminates blocks within EmitC control-flow operations. Since diff --git a/mlir/lib/Conversion/SCFToEmitC/SCFToEmitC.cpp b/mlir/lib/Conversion/SCFToEmitC/SCFToEmitC.cpp index 5d0d8df8869e3..bf69ba503f4e6 100644 --- a/mlir/lib/Conversion/SCFToEmitC/SCFToEmitC.cpp +++ b/mlir/lib/Conversion/SCFToEmitC/SCFToEmitC.cpp @@ -37,7 +37,100 @@ struct SCFToEmitCPass : public impl::SCFToEmitCBase { void runOnOperation() override; }; -// Lower scf::if to emitc::if, implementing return values as emitc::variable's +// Lower scf::for to emitc::for, implementing result values using +// emitc::variable's updated within the loop body. +struct ForLowering : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ForOp forOp, + PatternRewriter &rewriter) const override; +}; + +// Create an uninitialized emitc::variable op for each result of the given op. +template +static SmallVector createVariablesForResults(T op, + PatternRewriter &rewriter) { + SmallVector resultVariables; + + if (!op.getNumResults()) + return resultVariables; + + Location loc = op->getLoc(); + MLIRContext *context = op.getContext(); + + OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPoint(op); + + for (OpResult result : op.getResults()) { + Type resultType = result.getType(); + emitc::OpaqueAttr noInit = emitc::OpaqueAttr::get(context, ""); + emitc::VariableOp var = + rewriter.create(loc, resultType, noInit); + resultVariables.push_back(var); + } + + return resultVariables; +} + +// Create a series of assign ops assigning given values to given variables at +// the current insertion point of given rewriter. +static void assignValues(ValueRange values, SmallVector &variables, + PatternRewriter &rewriter, Location loc) { + for (auto [value, var] : llvm::zip(values, variables)) + rewriter.create(loc, var, value); +} + +static void lowerYield(SmallVector &resultVariables, + PatternRewriter &rewriter, scf::YieldOp yield) { + Location loc = yield.getLoc(); + ValueRange operands = yield.getOperands(); + + OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPoint(yield); + + assignValues(operands, resultVariables, rewriter, loc); + + rewriter.create(loc); + rewriter.eraseOp(yield); +} + +LogicalResult ForLowering::matchAndRewrite(ForOp forOp, + PatternRewriter &rewriter) const { + Location loc = forOp.getLoc(); + + // Create an emitc::variable op for each result. These variables will be + // assigned to by emitc::assign ops within the loop body. + SmallVector resultVariables = + createVariablesForResults(forOp, rewriter); + SmallVector iterArgsVariables = + createVariablesForResults(forOp, rewriter); + + assignValues(forOp.getInits(), iterArgsVariables, rewriter, loc); + + emitc::ForOp loweredFor = rewriter.create( + loc, forOp.getLowerBound(), forOp.getUpperBound(), forOp.getStep()); + + Block *loweredBody = loweredFor.getBody(); + + // Erase the auto-generated terminator for the lowered for op. + rewriter.eraseOp(loweredBody->getTerminator()); + + SmallVector replacingValues; + replacingValues.push_back(loweredFor.getInductionVar()); + replacingValues.append(iterArgsVariables.begin(), iterArgsVariables.end()); + + rewriter.mergeBlocks(forOp.getBody(), loweredBody, replacingValues); + lowerYield(iterArgsVariables, rewriter, + cast(loweredBody->getTerminator())); + + // Copy iterArgs into results after the for loop. + assignValues(iterArgsVariables, resultVariables, rewriter, loc); + + rewriter.replaceOp(forOp, resultVariables); + return success(); +} + +// Lower scf::if to emitc::if, implementing result values as emitc::variable's // updated within the then and else regions. struct IfLowering : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -52,20 +145,10 @@ LogicalResult IfLowering::matchAndRewrite(IfOp ifOp, PatternRewriter &rewriter) const { Location loc = ifOp.getLoc(); - SmallVector resultVariables; - // Create an emitc::variable op for each result. These variables will be // assigned to by emitc::assign ops within the then & else regions. - if (ifOp.getNumResults()) { - MLIRContext *context = ifOp.getContext(); - rewriter.setInsertionPoint(ifOp); - for (OpResult result : ifOp.getResults()) { - Type resultType = result.getType(); - auto noInit = emitc::OpaqueAttr::get(context, ""); - auto var = rewriter.create(loc, resultType, noInit); - resultVariables.push_back(var); - } - } + SmallVector resultVariables = + createVariablesForResults(ifOp, rewriter); // Utility function to lower the contents of an scf::if region to an emitc::if // region. The contents of the scf::if regions is moved into the respective @@ -76,16 +159,7 @@ LogicalResult IfLowering::matchAndRewrite(IfOp ifOp, Region &loweredRegion) { rewriter.inlineRegionBefore(region, loweredRegion, loweredRegion.end()); Operation *terminator = loweredRegion.back().getTerminator(); - Location terminatorLoc = terminator->getLoc(); - ValueRange terminatorOperands = terminator->getOperands(); - rewriter.setInsertionPointToEnd(&loweredRegion.back()); - for (auto value2Var : llvm::zip(terminatorOperands, resultVariables)) { - Value resultValue = std::get<0>(value2Var); - Value resultVar = std::get<1>(value2Var); - rewriter.create(terminatorLoc, resultVar, resultValue); - } - rewriter.create(terminatorLoc); - rewriter.eraseOp(terminator); + lowerYield(resultVariables, rewriter, cast(terminator)); }; Region &thenRegion = ifOp.getThenRegion(); @@ -109,6 +183,7 @@ LogicalResult IfLowering::matchAndRewrite(IfOp ifOp, } void mlir::populateSCFToEmitCConversionPatterns(RewritePatternSet &patterns) { + patterns.add(patterns.getContext()); patterns.add(patterns.getContext()); } @@ -118,7 +193,7 @@ void SCFToEmitCPass::runOnOperation() { // Configure conversion to lower out SCF operations. ConversionTarget target(getContext()); - target.addIllegalOp(); + target.addIllegalOp(); target.markUnknownOpDynamicallyLegal([](Operation *) { return true; }); if (failed( applyPartialConversion(getOperation(), target, std::move(patterns)))) diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp index 961a52a70a2a1..d06381b7ddad3 100644 --- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp +++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp @@ -189,6 +189,101 @@ LogicalResult emitc::ConstantOp::verify() { OpFoldResult emitc::ConstantOp::fold(FoldAdaptor adaptor) { return getValue(); } +//===----------------------------------------------------------------------===// +// ForOp +//===----------------------------------------------------------------------===// + +void ForOp::build(OpBuilder &builder, OperationState &result, Value lb, + Value ub, Value step, BodyBuilderFn bodyBuilder) { + result.addOperands({lb, ub, step}); + Type t = lb.getType(); + Region *bodyRegion = result.addRegion(); + bodyRegion->push_back(new Block); + Block &bodyBlock = bodyRegion->front(); + bodyBlock.addArgument(t, result.location); + + // Create the default terminator if the builder is not provided. + if (!bodyBuilder) { + ForOp::ensureTerminator(*bodyRegion, builder, result.location); + } else { + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToStart(&bodyBlock); + bodyBuilder(builder, result.location, bodyBlock.getArgument(0)); + } +} + +void ForOp::getCanonicalizationPatterns(RewritePatternSet &, MLIRContext *) {} + +ParseResult ForOp::parse(OpAsmParser &parser, OperationState &result) { + Builder &builder = parser.getBuilder(); + Type type; + + OpAsmParser::Argument inductionVariable; + OpAsmParser::UnresolvedOperand lb, ub, step; + + // Parse the induction variable followed by '='. + if (parser.parseOperand(inductionVariable.ssaName) || parser.parseEqual() || + // Parse loop bounds. + parser.parseOperand(lb) || parser.parseKeyword("to") || + parser.parseOperand(ub) || parser.parseKeyword("step") || + parser.parseOperand(step)) + return failure(); + + // Parse the optional initial iteration arguments. + SmallVector regionArgs; + SmallVector operands; + regionArgs.push_back(inductionVariable); + + // Parse optional type, else assume Index. + if (parser.parseOptionalColon()) + type = builder.getIndexType(); + else if (parser.parseType(type)) + return failure(); + + // Resolve input operands. + regionArgs.front().type = type; + if (parser.resolveOperand(lb, type, result.operands) || + parser.resolveOperand(ub, type, result.operands) || + parser.resolveOperand(step, type, result.operands)) + return failure(); + + // Parse the body region. + Region *body = result.addRegion(); + if (parser.parseRegion(*body, regionArgs)) + return failure(); + + ForOp::ensureTerminator(*body, builder, result.location); + + // Parse the optional attribute list. + if (parser.parseOptionalAttrDict(result.attributes)) + return failure(); + + return success(); +} + +void ForOp::print(OpAsmPrinter &p) { + p << " " << getInductionVar() << " = " << getLowerBound() << " to " + << getUpperBound() << " step " << getStep(); + + p << ' '; + if (Type t = getInductionVar().getType(); !t.isIndex()) + p << " : " << t << ' '; + p.printRegion(getRegion(), + /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/false); + p.printOptionalAttrDict((*this)->getAttrs()); +} + +LogicalResult ForOp::verifyRegions() { + // Check that the body defines as single block argument for the induction + // variable. + if (getInductionVar().getType() != getLowerBound().getType()) + return emitOpError( + "expected induction variable to be same type as bounds and step"); + + return success(); +} + //===----------------------------------------------------------------------===// // IfOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index 4645ca4b206e7..8ffea4d5b7b32 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -10,7 +10,6 @@ #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/EmitC/IR/EmitC.h" #include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" @@ -502,30 +501,10 @@ static LogicalResult printOperation(CppEmitter &emitter, return success(); } -static LogicalResult printOperation(CppEmitter &emitter, scf::ForOp forOp) { +static LogicalResult printOperation(CppEmitter &emitter, emitc::ForOp forOp) { raw_indented_ostream &os = emitter.ostream(); - OperandRange operands = forOp.getInitArgs(); - Block::BlockArgListType iterArgs = forOp.getRegionIterArgs(); - Operation::result_range results = forOp.getResults(); - - if (!emitter.shouldDeclareVariablesAtTop()) { - for (OpResult result : results) { - if (failed(emitter.emitVariableDeclaration(result, - /*trailingSemicolon=*/true))) - return failure(); - } - } - - for (auto pair : llvm::zip(iterArgs, operands)) { - if (failed(emitter.emitType(forOp.getLoc(), std::get<0>(pair).getType()))) - return failure(); - os << " " << emitter.getOrCreateName(std::get<0>(pair)) << " = "; - os << emitter.getOrCreateName(std::get<1>(pair)) << ";"; - os << "\n"; - } - os << "for ("; if (failed( emitter.emitType(forOp.getLoc(), forOp.getInductionVar().getType()))) @@ -548,35 +527,14 @@ static LogicalResult printOperation(CppEmitter &emitter, scf::ForOp forOp) { Region &forRegion = forOp.getRegion(); auto regionOps = forRegion.getOps(); - // We skip the trailing yield op because this updates the result variables - // of the for op in the generated code. Instead we update the iterArgs at - // the end of a loop iteration and set the result variables after the for - // loop. + // We skip the trailing yield op. for (auto it = regionOps.begin(); std::next(it) != regionOps.end(); ++it) { if (failed(emitter.emitOperation(*it, /*trailingSemicolon=*/true))) return failure(); } - Operation *yieldOp = forRegion.getBlocks().front().getTerminator(); - // Copy yield operands into iterArgs at the end of a loop iteration. - for (auto pair : llvm::zip(iterArgs, yieldOp->getOperands())) { - BlockArgument iterArg = std::get<0>(pair); - Value operand = std::get<1>(pair); - os << emitter.getOrCreateName(iterArg) << " = " - << emitter.getOrCreateName(operand) << ";\n"; - } - os.unindent() << "}"; - // Copy iterArgs into results after the for loop. - for (auto pair : llvm::zip(results, iterArgs)) { - OpResult result = std::get<0>(pair); - BlockArgument iterArg = std::get<1>(pair); - os << "\n" - << emitter.getOrCreateName(result) << " = " - << emitter.getOrCreateName(iterArg) << ";"; - } - return success(); } @@ -617,33 +575,6 @@ static LogicalResult printOperation(CppEmitter &emitter, emitc::IfOp ifOp) { return success(); } -static LogicalResult printOperation(CppEmitter &emitter, scf::YieldOp yieldOp) { - raw_ostream &os = emitter.ostream(); - Operation &parentOp = *yieldOp.getOperation()->getParentOp(); - - if (yieldOp.getNumOperands() != parentOp.getNumResults()) { - return yieldOp.emitError("number of operands does not to match the number " - "of the parent op's results"); - } - - if (failed(interleaveWithError( - llvm::zip(parentOp.getResults(), yieldOp.getOperands()), - [&](auto pair) -> LogicalResult { - auto result = std::get<0>(pair); - auto operand = std::get<1>(pair); - os << emitter.getOrCreateName(result) << " = "; - - if (!emitter.hasValueInScope(operand)) - return yieldOp.emitError("operand value not in scope"); - os << emitter.getOrCreateName(operand); - return success(); - }, - [&]() { os << ";\n"; }))) - return failure(); - - return success(); -} - static LogicalResult printOperation(CppEmitter &emitter, func::ReturnOp returnOp) { raw_ostream &os = emitter.ostream(); @@ -748,10 +679,11 @@ static LogicalResult printOperation(CppEmitter &emitter, for (Operation &op : block.getOperations()) { // When generating code for an emitc.if or cf.cond_br op no semicolon // needs to be printed after the closing brace. - // When generating code for an scf.for op, printing a trailing semicolon + // When generating code for an emitc.for op, printing a trailing semicolon // is handled within the printOperation function. bool trailingSemicolon = - !isa(op); + !isa( + op); if (failed(emitter.emitOperation( op, /*trailingSemicolon=*/trailingSemicolon))) @@ -1015,15 +947,12 @@ LogicalResult CppEmitter::emitOperation(Operation &op, bool trailingSemicolon) { // EmitC ops. .Case( + emitc::ForOp, emitc::IfOp, emitc::IncludeOp, emitc::MulOp, + emitc::RemOp, emitc::SubOp, emitc::VariableOp>( [&](auto op) { return printOperation(*this, op); }) // Func ops. .Case( [&](auto op) { return printOperation(*this, op); }) - // SCF ops. - .Case( - [&](auto op) { return printOperation(*this, op); }) // Arithmetic ops. .Case( [&](auto op) { return printOperation(*this, op); }) diff --git a/mlir/test/Conversion/SCFToEmitC/for.mlir b/mlir/test/Conversion/SCFToEmitC/for.mlir new file mode 100644 index 0000000000000..7f90310af2189 --- /dev/null +++ b/mlir/test/Conversion/SCFToEmitC/for.mlir @@ -0,0 +1,96 @@ +// RUN: mlir-opt -allow-unregistered-dialect -convert-scf-to-emitc %s | FileCheck %s + +func.func @simple_std_for_loop(%arg0 : index, %arg1 : index, %arg2 : index) { + scf.for %i0 = %arg0 to %arg1 step %arg2 { + %c1 = arith.constant 1 : index + } + return +} +// CHECK-LABEL: func.func @simple_std_for_loop( +// CHECK-SAME: %[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: index) { +// CHECK-NEXT: emitc.for %[[VAL_3:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_2]] { +// CHECK-NEXT: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK-NEXT: } +// CHECK-NEXT: return +// CHECK-NEXT: } + +func.func @simple_std_2_for_loops(%arg0 : index, %arg1 : index, %arg2 : index) { + scf.for %i0 = %arg0 to %arg1 step %arg2 { + %c1 = arith.constant 1 : index + scf.for %i1 = %arg0 to %arg1 step %arg2 { + %c1_0 = arith.constant 1 : index + } + } + return +} +// CHECK-LABEL: func.func @simple_std_2_for_loops( +// CHECK-SAME: %[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: index) { +// CHECK-NEXT: emitc.for %[[VAL_3:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_2]] { +// CHECK-NEXT: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK-NEXT: emitc.for %[[VAL_5:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_2]] { +// CHECK-NEXT: %[[VAL_6:.*]] = arith.constant 1 : index +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: return +// CHECK-NEXT: } + +func.func @for_yield(%arg0 : index, %arg1 : index, %arg2 : index) -> (f32, f32) { + %s0 = arith.constant 0.0 : f32 + %s1 = arith.constant 1.0 : f32 + %result:2 = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0, %sj = %s1) -> (f32, f32) { + %sn = arith.addf %si, %sj : f32 + scf.yield %sn, %sn : f32, f32 + } + return %result#0, %result#1 : f32, f32 +} +// CHECK-LABEL: func.func @for_yield( +// CHECK-SAME: %[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: index) -> (f32, f32) { +// CHECK-NEXT: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-NEXT: %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK-NEXT: %[[VAL_5:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f32 +// CHECK-NEXT: %[[VAL_6:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f32 +// CHECK-NEXT: %[[VAL_7:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f32 +// CHECK-NEXT: %[[VAL_8:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f32 +// CHECK-NEXT: emitc.assign %[[VAL_3]] : f32 to %[[VAL_7]] : f32 +// CHECK-NEXT: emitc.assign %[[VAL_4]] : f32 to %[[VAL_8]] : f32 +// CHECK-NEXT: emitc.for %[[VAL_9:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_2]] { +// CHECK-NEXT: %[[VAL_10:.*]] = arith.addf %[[VAL_7]], %[[VAL_8]] : f32 +// CHECK-NEXT: emitc.assign %[[VAL_10]] : f32 to %[[VAL_7]] : f32 +// CHECK-NEXT: emitc.assign %[[VAL_10]] : f32 to %[[VAL_8]] : f32 +// CHECK-NEXT: } +// CHECK-NEXT: emitc.assign %[[VAL_7]] : f32 to %[[VAL_5]] : f32 +// CHECK-NEXT: emitc.assign %[[VAL_8]] : f32 to %[[VAL_6]] : f32 +// CHECK-NEXT: return %[[VAL_5]], %[[VAL_6]] : f32, f32 +// CHECK-NEXT: } + +func.func @nested_for_yield(%arg0 : index, %arg1 : index, %arg2 : index) -> f32 { + %s0 = arith.constant 1.0 : f32 + %r = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%iter = %s0) -> (f32) { + %result = scf.for %i1 = %arg0 to %arg1 step %arg2 iter_args(%si = %iter) -> (f32) { + %sn = arith.addf %si, %si : f32 + scf.yield %sn : f32 + } + scf.yield %result : f32 + } + return %r : f32 +} +// CHECK-LABEL: func.func @nested_for_yield( +// CHECK-SAME: %[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: index) -> f32 { +// CHECK-NEXT: %[[VAL_3:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK-NEXT: %[[VAL_4:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f32 +// CHECK-NEXT: %[[VAL_5:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f32 +// CHECK-NEXT: emitc.assign %[[VAL_3]] : f32 to %[[VAL_5]] : f32 +// CHECK-NEXT: emitc.for %[[VAL_6:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_2]] { +// CHECK-NEXT: %[[VAL_7:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f32 +// CHECK-NEXT: %[[VAL_8:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f32 +// CHECK-NEXT: emitc.assign %[[VAL_5]] : f32 to %[[VAL_8]] : f32 +// CHECK-NEXT: emitc.for %[[VAL_9:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_2]] { +// CHECK-NEXT: %[[VAL_10:.*]] = arith.addf %[[VAL_8]], %[[VAL_8]] : f32 +// CHECK-NEXT: emitc.assign %[[VAL_10]] : f32 to %[[VAL_8]] : f32 +// CHECK-NEXT: } +// CHECK-NEXT: emitc.assign %[[VAL_8]] : f32 to %[[VAL_7]] : f32 +// CHECK-NEXT: emitc.assign %[[VAL_7]] : f32 to %[[VAL_5]] : f32 +// CHECK-NEXT: } +// CHECK-NEXT: emitc.assign %[[VAL_5]] : f32 to %[[VAL_4]] : f32 +// CHECK-NEXT: return %[[VAL_4]] : f32 +// CHECK-NEXT: } diff --git a/mlir/test/Dialect/EmitC/invalid_ops.mlir b/mlir/test/Dialect/EmitC/invalid_ops.mlir index 9e8f0bf0bf8bd..53d88adf4305f 100644 --- a/mlir/test/Dialect/EmitC/invalid_ops.mlir +++ b/mlir/test/Dialect/EmitC/invalid_ops.mlir @@ -203,7 +203,7 @@ func.func @sub_pointer_pointer(%arg0: !emitc.ptr, %arg1: !emitc.ptr) { // ----- func.func @test_misplaced_yield() { - // expected-error @+1 {{'emitc.yield' op expects parent op 'emitc.if'}} + // expected-error @+1 {{'emitc.yield' op expects parent op to be one of 'emitc.if, emitc.for'}} emitc.yield return } diff --git a/mlir/test/Dialect/EmitC/ops.mlir b/mlir/test/Dialect/EmitC/ops.mlir index 0817945e3b1e0..6c83986809804 100644 --- a/mlir/test/Dialect/EmitC/ops.mlir +++ b/mlir/test/Dialect/EmitC/ops.mlir @@ -105,7 +105,7 @@ func.func @test_if(%arg0: i1, %arg1: f32) { return } -func.func @test_explicit_yield(%arg0: i1, %arg1: f32) { +func.func @test_if_explicit_yield(%arg0: i1, %arg1: f32) { emitc.if %arg0 { %0 = emitc.call "func_const"(%arg1) : (f32) -> i32 emitc.yield @@ -127,3 +127,25 @@ func.func @test_assign(%arg1: f32) { emitc.assign %arg1 : f32 to %v : f32 return } + +func.func @test_for(%arg0 : index, %arg1 : index, %arg2 : index) { + emitc.for %i0 = %arg0 to %arg1 step %arg2 { + %0 = emitc.call "func_const"(%i0) : (index) -> i32 + } + return +} + +func.func @test_for_explicit_yield(%arg0 : index, %arg1 : index, %arg2 : index) { + emitc.for %i0 = %arg0 to %arg1 step %arg2 { + %0 = emitc.call "func_const"(%i0) : (index) -> i32 + emitc.yield + } + return +} + +func.func @test_for_not_index_induction(%arg0 : i16, %arg1 : i16, %arg2 : i16) { + emitc.for %i0 = %arg0 to %arg1 step %arg2 : i16 { + %0 = emitc.call "func_const"(%i0) : (i16) -> i32 + } + return +} diff --git a/mlir/test/Target/Cpp/for.mlir b/mlir/test/Target/Cpp/for.mlir index e904c99820ad8..c02c8b1ac33e3 100644 --- a/mlir/test/Target/Cpp/for.mlir +++ b/mlir/test/Target/Cpp/for.mlir @@ -2,7 +2,7 @@ // RUN: mlir-translate -mlir-to-cpp -declare-variables-at-top %s | FileCheck %s -check-prefix=CPP-DECLTOP func.func @test_for(%arg0 : index, %arg1 : index, %arg2 : index) { - scf.for %i0 = %arg0 to %arg1 step %arg2 { + emitc.for %i0 = %arg0 to %arg1 step %arg2 { %0 = emitc.call "f"() : () -> i32 } return @@ -28,11 +28,21 @@ func.func @test_for_yield() { %s0 = arith.constant 0 : i32 %p0 = arith.constant 1.0 : f32 - %result:2 = scf.for %iter = %start to %stop step %step iter_args(%si = %s0, %pi = %p0) -> (i32, f32) { - %sn = emitc.call "add"(%si, %iter) : (i32, index) -> i32 - %pn = emitc.call "mul"(%pi, %iter) : (f32, index) -> f32 - scf.yield %sn, %pn : i32, f32 + %0 = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> i32 + %1 = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f32 + %2 = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> i32 + %3 = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f32 + emitc.assign %s0 : i32 to %2 : i32 + emitc.assign %p0 : f32 to %3 : f32 + emitc.for %iter = %start to %stop step %step { + %sn = emitc.call "add"(%2, %iter) : (i32, index) -> i32 + %pn = emitc.call "mul"(%3, %iter) : (f32, index) -> f32 + emitc.assign %sn : i32 to %2 : i32 + emitc.assign %pn : f32 to %3 : f32 + emitc.yield } + emitc.assign %2 : i32 to %0 : i32 + emitc.assign %3 : f32 to %1 : f32 return } @@ -44,8 +54,10 @@ func.func @test_for_yield() { // CPP-DEFAULT-NEXT: float [[P0:[^ ]*]] = (float)1.000000000e+00; // CPP-DEFAULT-NEXT: int32_t [[SE:[^ ]*]]; // CPP-DEFAULT-NEXT: float [[PE:[^ ]*]]; -// CPP-DEFAULT-NEXT: int32_t [[SI:[^ ]*]] = [[S0]]; -// CPP-DEFAULT-NEXT: float [[PI:[^ ]*]] = [[P0]]; +// CPP-DEFAULT-NEXT: int32_t [[SI:[^ ]*]]; +// CPP-DEFAULT-NEXT: float [[PI:[^ ]*]]; +// CPP-DEFAULT-NEXT: [[SI:[^ ]*]] = [[S0]]; +// CPP-DEFAULT-NEXT: [[PI:[^ ]*]] = [[P0]]; // CPP-DEFAULT-NEXT: for (size_t [[ITER:[^ ]*]] = [[START]]; [[ITER]] < [[STOP]]; [[ITER]] += [[STEP]]) { // CPP-DEFAULT-NEXT: int32_t [[SN:[^ ]*]] = add([[SI]], [[ITER]]); // CPP-DEFAULT-NEXT: float [[PN:[^ ]*]] = mul([[PI]], [[ITER]]); @@ -64,6 +76,8 @@ func.func @test_for_yield() { // CPP-DECLTOP-NEXT: float [[P0:[^ ]*]]; // CPP-DECLTOP-NEXT: int32_t [[SE:[^ ]*]]; // CPP-DECLTOP-NEXT: float [[PE:[^ ]*]]; +// CPP-DECLTOP-NEXT: int32_t [[SI:[^ ]*]]; +// CPP-DECLTOP-NEXT: float [[PI:[^ ]*]]; // CPP-DECLTOP-NEXT: int32_t [[SN:[^ ]*]]; // CPP-DECLTOP-NEXT: float [[PN:[^ ]*]]; // CPP-DECLTOP-NEXT: [[START]] = 0; @@ -71,8 +85,12 @@ func.func @test_for_yield() { // CPP-DECLTOP-NEXT: [[STEP]] = 1; // CPP-DECLTOP-NEXT: [[S0]] = 0; // CPP-DECLTOP-NEXT: [[P0]] = (float)1.000000000e+00; -// CPP-DECLTOP-NEXT: int32_t [[SI:[^ ]*]] = [[S0]]; -// CPP-DECLTOP-NEXT: float [[PI:[^ ]*]] = [[P0]]; +// CPP-DECLTOP-NEXT: ; +// CPP-DECLTOP-NEXT: ; +// CPP-DECLTOP-NEXT: ; +// CPP-DECLTOP-NEXT: ; +// CPP-DECLTOP-NEXT: [[SI:[^ ]*]] = [[S0]]; +// CPP-DECLTOP-NEXT: [[PI:[^ ]*]] = [[P0]]; // CPP-DECLTOP-NEXT: for (size_t [[ITER:[^ ]*]] = [[START]]; [[ITER]] < [[STOP]]; [[ITER]] += [[STEP]]) { // CPP-DECLTOP-NEXT: [[SN]] = add([[SI]], [[ITER]]); // CPP-DECLTOP-NEXT: [[PN]] = mul([[PI]], [[ITER]]); @@ -91,14 +109,24 @@ func.func @test_for_yield_2() { %s0 = emitc.literal "0" : i32 %p0 = emitc.literal "M_PI" : f32 - %result:2 = scf.for %iter = %start to %stop step %step iter_args(%si = %s0, %pi = %p0) -> (i32, f32) { - %sn = emitc.call "add"(%si, %iter) : (i32, index) -> i32 - %pn = emitc.call "mul"(%pi, %iter) : (f32, index) -> f32 - scf.yield %sn, %pn : i32, f32 + %0 = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> i32 + %1 = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f32 + %2 = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> i32 + %3 = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f32 + emitc.assign %s0 : i32 to %2 : i32 + emitc.assign %p0 : f32 to %3 : f32 + emitc.for %iter = %start to %stop step %step { + %sn = emitc.call "add"(%2, %iter) : (i32, index) -> i32 + %pn = emitc.call "mul"(%3, %iter) : (f32, index) -> f32 + emitc.assign %sn : i32 to %2 : i32 + emitc.assign %pn : f32 to %3 : f32 + emitc.yield } + emitc.assign %2 : i32 to %0 : i32 + emitc.assign %3 : f32 to %1 : f32 return } // CPP-DEFAULT: void test_for_yield_2() { -// CPP-DEFAULT: float{{.*}}= M_PI +// CPP-DEFAULT: {{.*}}= M_PI // CPP-DEFAULT: for (size_t [[IN:.*]] = 0; [[IN]] < 10; [[IN]] += 1) { From f118d474eb64c29aff848fe60d1a49fbd310dcbe Mon Sep 17 00:00:00 2001 From: Alexander Richardson Date: Thu, 26 Oct 2023 07:08:58 -0700 Subject: [PATCH 072/877] [AMDGPU] Use alloca address space in rewrite-out-arguments.ll (#70269) This is needed for the transform to fire with a correct data layout. Pre-commiting this change to keep the diff of D141060 smaller. --- .../CodeGen/AMDGPU/rewrite-out-arguments.ll | 652 +++++++++--------- 1 file changed, 327 insertions(+), 325 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll index 72d0053693b78..0d35ba8e1161e 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll +++ b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll @@ -1,119 +1,121 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-rewrite-out-arguments < %s | FileCheck %s - +; Temporarily add an explicit datalayout until https://reviews.llvm.org/D141060 lands +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target triple = "amdgcn-amd-amdhsa" define void @no_ret_blocks() #0 { unreachable } -define void @void_one_out_arg_i32_no_use(ptr %val) #0 { +define void @void_one_out_arg_i32_no_use(ptr addrspace(5) %val) #0 { ret void } -define void @skip_byval_arg(ptr byval(i32) %val) #0 { - store i32 0, ptr %val +define void @skip_byval_arg(ptr addrspace(5) byval(i32) %val) #0 { + store i32 0, ptr addrspace(5) %val ret void } -define void @skip_optnone(ptr byval(i32) %val) #1 { - store i32 0, ptr %val +define void @skip_optnone(ptr addrspace(5) byval(i32) %val) #1 { + store i32 0, ptr addrspace(5) %val ret void } -define void @skip_volatile(ptr byval(i32) %val) #0 { - store volatile i32 0, ptr %val +define void @skip_volatile(ptr addrspace(5) byval(i32) %val) #0 { + store volatile i32 0, ptr addrspace(5) %val ret void } -define void @skip_atomic(ptr byval(i32) %val) #0 { - store atomic i32 0, ptr %val seq_cst, align 4 +define void @skip_atomic(ptr addrspace(5) byval(i32) %val) #0 { + store atomic i32 0, ptr addrspace(5) %val seq_cst, align 4 ret void } -define void @skip_store_pointer_val(ptr %val) #0 { - store ptr %val, ptr poison +define void @skip_store_pointer_val(ptr addrspace(5) %val) #0 { + store ptr addrspace(5) %val, ptr poison ret void } -define void @skip_store_gep(ptr %val) #0 { - %gep = getelementptr inbounds i32, ptr %val, i32 1 - store i32 0, ptr %gep +define void @skip_store_gep(ptr addrspace(5) %val) #0 { + %gep = getelementptr inbounds i32, ptr addrspace(5) %val, i32 1 + store i32 0, ptr addrspace(5) %gep ret void } -define void @skip_sret(ptr sret(i32) %sret, ptr %out) #0 { - store i32 1, ptr %sret - store i32 0, ptr %out +define void @skip_sret(ptr addrspace(5) sret(i32) %sret, ptr addrspace(5) %out) #0 { + store i32 1, ptr addrspace(5) %sret + store i32 0, ptr addrspace(5) %out ret void } -define void @void_one_out_arg_i32_1_use(ptr %val) #0 { - store i32 0, ptr %val +define void @void_one_out_arg_i32_1_use(ptr addrspace(5) %val) #0 { + store i32 0, ptr addrspace(5) %val ret void } -define void @void_one_out_arg_i32_1_use_align(ptr align 8 %val) #0 { - store i32 0, ptr %val, align 8 +define void @void_one_out_arg_i32_1_use_align(ptr addrspace(5) align 8 %val) #0 { + store i32 0, ptr addrspace(5) %val, align 8 ret void } -define void @void_one_out_arg_i32_2_use(i1 %arg0, ptr %val) #0 { +define void @void_one_out_arg_i32_2_use(i1 %arg0, ptr addrspace(5) %val) #0 { br i1 %arg0, label %ret0, label %ret1 ret0: - store i32 0, ptr %val + store i32 0, ptr addrspace(5) %val ret void ret1: - store i32 9, ptr %val + store i32 9, ptr addrspace(5) %val ret void } declare void @may.clobber() -define void @void_one_out_arg_i32_2_stores(ptr %val) #0 { - store i32 0, ptr %val - store i32 1, ptr %val +define void @void_one_out_arg_i32_2_stores(ptr addrspace(5) %val) #0 { + store i32 0, ptr addrspace(5) %val + store i32 1, ptr addrspace(5) %val ret void } -define void @void_one_out_arg_i32_2_stores_clobber(ptr %val) #0 { - store i32 0, ptr %val +define void @void_one_out_arg_i32_2_stores_clobber(ptr addrspace(5) %val) #0 { + store i32 0, ptr addrspace(5) %val call void @may.clobber() - store i32 1, ptr %val + store i32 1, ptr addrspace(5) %val ret void } -define void @void_one_out_arg_i32_call_may_clobber(ptr %val) #0 { - store i32 0, ptr %val +define void @void_one_out_arg_i32_call_may_clobber(ptr addrspace(5) %val) #0 { + store i32 0, ptr addrspace(5) %val call void @may.clobber() ret void } -define void @void_one_out_arg_i32_pre_call_may_clobber(ptr %val) #0 { +define void @void_one_out_arg_i32_pre_call_may_clobber(ptr addrspace(5) %val) #0 { call void @may.clobber() - store i32 0, ptr %val + store i32 0, ptr addrspace(5) %val ret void } -define void @void_one_out_arg_i32_reload(ptr %val) #0 { - store i32 0, ptr %val - %load = load i32, ptr %val, align 4 +define void @void_one_out_arg_i32_reload(ptr addrspace(5) %val) #0 { + store i32 0, ptr addrspace(5) %val + %load = load i32, ptr addrspace(5) %val, align 4 ret void } -define void @void_one_out_arg_i32_store_in_different_block(ptr %out) #0 { +define void @void_one_out_arg_i32_store_in_different_block(ptr addrspace(5) %out) #0 { %load = load i32, ptr addrspace(1) poison - store i32 0, ptr %out + store i32 0, ptr addrspace(5) %out br label %ret ret: @@ -121,20 +123,20 @@ ret: } -define void @unused_out_arg_one_branch(i1 %arg0, ptr %val) #0 { +define void @unused_out_arg_one_branch(i1 %arg0, ptr addrspace(5) %val) #0 { br i1 %arg0, label %ret0, label %ret1 ret0: ret void ret1: - store i32 9, ptr %val + store i32 9, ptr addrspace(5) %val ret void } -define void @void_one_out_arg_v2i32_1_use(ptr %val) #0 { - store <2 x i32> , ptr %val +define void @void_one_out_arg_v2i32_1_use(ptr addrspace(5) %val) #0 { + store <2 x i32> , ptr addrspace(5) %val ret void } @@ -142,50 +144,50 @@ define void @void_one_out_arg_v2i32_1_use(ptr %val) #0 { ; Normally this is split into element accesses which we don't handle. -define void @void_one_out_arg_struct_1_use(ptr %out) #0 { - store %struct { i32 9, i8 99, float 4.0 }, ptr %out +define void @void_one_out_arg_struct_1_use(ptr addrspace(5) %out) #0 { + store %struct { i32 9, i8 99, float 4.0 }, ptr addrspace(5) %out ret void } -define i32 @i32_one_out_arg_i32_1_use(ptr %val) #0 { - store i32 24, ptr %val +define i32 @i32_one_out_arg_i32_1_use(ptr addrspace(5) %val) #0 { + store i32 24, ptr addrspace(5) %val ret i32 9 } -define void @unused_different_type(ptr %arg0, ptr nocapture %arg1) #0 { - store float 4.0, ptr %arg1, align 4 +define void @unused_different_type(ptr addrspace(5) %arg0, ptr addrspace(5) nocapture %arg1) #0 { + store float 4.0, ptr addrspace(5) %arg1, align 4 ret void } -define void @multiple_same_return_noalias(ptr noalias %out0, ptr noalias %out1) #0 { - store i32 1, ptr %out0, align 4 - store i32 2, ptr %out1, align 4 +define void @multiple_same_return_noalias(ptr addrspace(5) noalias %out0, ptr addrspace(5) noalias %out1) #0 { + store i32 1, ptr addrspace(5) %out0, align 4 + store i32 2, ptr addrspace(5) %out1, align 4 ret void } -define void @multiple_same_return_mayalias(ptr %out0, ptr %out1) #0 { - store i32 1, ptr %out0, align 4 - store i32 2, ptr %out1, align 4 +define void @multiple_same_return_mayalias(ptr addrspace(5) %out0, ptr addrspace(5) %out1) #0 { + store i32 1, ptr addrspace(5) %out0, align 4 + store i32 2, ptr addrspace(5) %out1, align 4 ret void } -define void @multiple_same_return_mayalias_order(ptr %out0, ptr %out1) #0 { - store i32 2, ptr %out1, align 4 - store i32 1, ptr %out0, align 4 +define void @multiple_same_return_mayalias_order(ptr addrspace(5) %out0, ptr addrspace(5) %out1) #0 { + store i32 2, ptr addrspace(5) %out1, align 4 + store i32 1, ptr addrspace(5) %out0, align 4 ret void } ; Currently this fails to convert because the store won't be found if ; it isn't in the same block as the return. -define i32 @store_in_entry_block(i1 %arg0, ptr %out) #0 { +define i32 @store_in_entry_block(i1 %arg0, ptr addrspace(5) %out) #0 { entry: %val0 = load i32, ptr addrspace(1) poison - store i32 %val0, ptr %out + store i32 %val0, ptr addrspace(5) %out br i1 %arg0, label %if, label %endif if: @@ -198,8 +200,8 @@ endif: } -define i1 @i1_one_out_arg_i32_1_use(ptr %val) #0 { - store i32 24, ptr %val +define i1 @i1_one_out_arg_i32_1_use(ptr addrspace(5) %val) #0 { + store i32 24, ptr addrspace(5) %val ret i1 true } @@ -207,20 +209,20 @@ define i1 @i1_one_out_arg_i32_1_use(ptr %val) #0 { ; incompatible with struct return types. -define zeroext i1 @i1_zeroext_one_out_arg_i32_1_use(ptr %val) #0 { - store i32 24, ptr %val +define zeroext i1 @i1_zeroext_one_out_arg_i32_1_use(ptr addrspace(5) %val) #0 { + store i32 24, ptr addrspace(5) %val ret i1 true } -define signext i1 @i1_signext_one_out_arg_i32_1_use(ptr %val) #0 { - store i32 24, ptr %val +define signext i1 @i1_signext_one_out_arg_i32_1_use(ptr addrspace(5) %val) #0 { + store i32 24, ptr addrspace(5) %val ret i1 true } -define noalias ptr addrspace(1) @p1i32_noalias_one_out_arg_i32_1_use(ptr %val) #0 { - store i32 24, ptr %val +define noalias ptr addrspace(1) @p1i32_noalias_one_out_arg_i32_1_use(ptr addrspace(5) %val) #0 { + store i32 24, ptr addrspace(5) %val ret ptr addrspace(1) null } @@ -229,74 +231,74 @@ define void @void_one_out_non_private_arg_i32_1_use(ptr addrspace(1) %val) #0 { ret void } -define void @func_ptr_type(ptr %out) #0 { +define void @func_ptr_type(ptr addrspace(5) %out) #0 { %func = load ptr, ptr poison - store ptr %func, ptr %out + store ptr %func, ptr addrspace(5) %out ret void } -define void @bitcast_func_ptr_type(ptr %out) #0 { +define void @bitcast_func_ptr_type(ptr addrspace(5) %out) #0 { %func = load ptr, ptr poison - store ptr %func, ptr %out + store ptr %func, ptr addrspace(5) %out ret void } -define void @out_arg_small_array(ptr %val) #0 { - store [4 x i32] [i32 0, i32 1, i32 2, i32 3], ptr %val +define void @out_arg_small_array(ptr addrspace(5) %val) #0 { + store [4 x i32] [i32 0, i32 1, i32 2, i32 3], ptr addrspace(5) %val ret void } -define void @out_arg_large_array(ptr %val) #0 { - store [17 x i32] zeroinitializer, ptr %val +define void @out_arg_large_array(ptr addrspace(5) %val) #0 { + store [17 x i32] zeroinitializer, ptr addrspace(5) %val ret void } -define <16 x i32> @num_regs_return_limit(ptr %out, i32 %val) #0 { +define <16 x i32> @num_regs_return_limit(ptr addrspace(5) %out, i32 %val) #0 { %load = load volatile <16 x i32>, ptr addrspace(1) poison - store i32 %val, ptr %out + store i32 %val, ptr addrspace(5) %out ret <16 x i32> %load } -define [15 x i32] @num_regs_reach_limit(ptr %out, i32 %val) #0 { +define [15 x i32] @num_regs_reach_limit(ptr addrspace(5) %out, i32 %val) #0 { %load = load volatile [15 x i32], ptr addrspace(1) poison - store i32 %val, ptr %out + store i32 %val, ptr addrspace(5) %out ret [15 x i32] %load } -define [15 x i32] @num_regs_reach_limit_leftover(ptr %out0, ptr %out1, i32 %val0) #0 { +define [15 x i32] @num_regs_reach_limit_leftover(ptr addrspace(5) %out0, ptr addrspace(5) %out1, i32 %val0) #0 { %load0 = load volatile [15 x i32], ptr addrspace(1) poison %load1 = load volatile i32, ptr addrspace(1) poison - store i32 %val0, ptr %out0 - store i32 %load1, ptr %out1 + store i32 %val0, ptr addrspace(5) %out0 + store i32 %load1, ptr addrspace(5) %out1 ret [15 x i32] %load0 } -define void @preserve_debug_info(i32 %arg0, ptr %val) #0 !dbg !5 { +define void @preserve_debug_info(i32 %arg0, ptr addrspace(5) %val) #0 !dbg !5 { call void @may.clobber(), !dbg !10 - store i32 %arg0, ptr %val, !dbg !11 + store i32 %arg0, ptr addrspace(5) %val, !dbg !11 ret void, !dbg !12 } -define void @preserve_metadata(i32 %arg0, ptr %val) #0 !kernel_arg_access_qual !13 { +define void @preserve_metadata(i32 %arg0, ptr addrspace(5) %val) #0 !kernel_arg_access_qual !13 { call void @may.clobber() - store i32 %arg0, ptr %val + store i32 %arg0, ptr addrspace(5) %val ret void } ; Clang emits this pattern for 3-vectors for some reason. -define void @bitcast_pointer_v4i32_v3i32(ptr %out) #0 { +define void @bitcast_pointer_v4i32_v3i32(ptr addrspace(5) %out) #0 { %load = load volatile <4 x i32>, ptr addrspace(1) poison - store <4 x i32> %load, ptr %out + store <4 x i32> %load, ptr addrspace(5) %out ret void } -define void @bitcast_pointer_v4i32_v3f32(ptr %out) #0 { +define void @bitcast_pointer_v4i32_v3f32(ptr addrspace(5) %out) #0 { %load = load volatile <4 x i32>, ptr addrspace(1) poison - store <4 x i32> %load, ptr %out + store <4 x i32> %load, ptr addrspace(5) %out ret void } @@ -305,21 +307,21 @@ define void @bitcast_pointer_v4i32_v3f32(ptr %out) #0 { ; casts. -define void @bitcast_pointer_i32_f32(ptr %out) #0 { +define void @bitcast_pointer_i32_f32(ptr addrspace(5) %out) #0 { %load = load volatile i32, ptr addrspace(1) poison - store i32 %load, ptr %out + store i32 %load, ptr addrspace(5) %out ret void } -define void @bitcast_pointer_i32_f16(ptr %out) #0 { +define void @bitcast_pointer_i32_f16(ptr addrspace(5) %out) #0 { %load = load volatile i32, ptr addrspace(1) poison - store i32 %load, ptr %out + store i32 %load, ptr addrspace(5) %out ret void } -define void @bitcast_pointer_f16_i32(ptr %out) #0 { +define void @bitcast_pointer_f16_i32(ptr addrspace(5) %out) #0 { %load = load volatile half, ptr addrspace(1) poison - store half %load, ptr %out + store half %load, ptr addrspace(5) %out ret void } @@ -330,80 +332,80 @@ define void @bitcast_pointer_f16_i32(ptr %out) #0 { %struct.v4f32 = type { <4 x float> } -define void @bitcast_struct_v3f32_v3f32(ptr %out, <3 x float> %value) #0 { +define void @bitcast_struct_v3f32_v3f32(ptr addrspace(5) %out, <3 x float> %value) #0 { %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> - store <4 x float> %extractVec, ptr %out, align 16 + store <4 x float> %extractVec, ptr addrspace(5) %out, align 16 ret void } -define void @bitcast_struct_v3f32_v3i32(ptr %out, <3 x i32> %value) #0 { +define void @bitcast_struct_v3f32_v3i32(ptr addrspace(5) %out, <3 x i32> %value) #0 { %extractVec = shufflevector <3 x i32> %value, <3 x i32> poison, <4 x i32> - store <4 x i32> %extractVec, ptr %out, align 16 + store <4 x i32> %extractVec, ptr addrspace(5) %out, align 16 ret void } -define void @bitcast_struct_v4f32_v4f32(ptr %out, <4 x float> %value) #0 { - store <4 x float> %value, ptr %out, align 16 +define void @bitcast_struct_v4f32_v4f32(ptr addrspace(5) %out, <4 x float> %value) #0 { + store <4 x float> %value, ptr addrspace(5) %out, align 16 ret void } -define void @bitcast_struct_v3f32_v4i32(ptr %out, <4 x i32> %value) #0 { - store <4 x i32> %value, ptr %out, align 16 +define void @bitcast_struct_v3f32_v4i32(ptr addrspace(5) %out, <4 x i32> %value) #0 { + store <4 x i32> %value, ptr addrspace(5) %out, align 16 ret void } -define void @bitcast_struct_v4f32_v3f32(ptr %out, <3 x float> %value) #0 { +define void @bitcast_struct_v4f32_v3f32(ptr addrspace(5) %out, <3 x float> %value) #0 { %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> - store <4 x float> %extractVec, ptr %out, align 16 + store <4 x float> %extractVec, ptr addrspace(5) %out, align 16 ret void } -define void @bitcast_struct_v3f32_v2f32(ptr %out, <2 x float> %value) #0 { - store <2 x float> %value, ptr %out, align 8 +define void @bitcast_struct_v3f32_v2f32(ptr addrspace(5) %out, <2 x float> %value) #0 { + store <2 x float> %value, ptr addrspace(5) %out, align 8 ret void } -define void @bitcast_struct_v3f32_f32_v3f32(ptr %out, <3 x float> %value) #0 { +define void @bitcast_struct_v3f32_f32_v3f32(ptr addrspace(5) %out, <3 x float> %value) #0 { %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> - store <4 x float> %extractVec, ptr %out, align 16 + store <4 x float> %extractVec, ptr addrspace(5) %out, align 16 ret void } -define void @bitcast_struct_v3f32_f32_v4f32(ptr %out, <4 x float> %value) #0 { - store <4 x float> %value, ptr %out, align 16 +define void @bitcast_struct_v3f32_f32_v4f32(ptr addrspace(5) %out, <4 x float> %value) #0 { + store <4 x float> %value, ptr addrspace(5) %out, align 16 ret void } -define void @bitcast_struct_i128_v4f32(ptr %out, <4 x float> %value) #0 { - store <4 x float> %value, ptr %out, align 16 +define void @bitcast_struct_i128_v4f32(ptr addrspace(5) %out, <4 x float> %value) #0 { + store <4 x float> %value, ptr addrspace(5) %out, align 16 ret void } -define void @bitcast_array_v4i32_v4f32(ptr %out, [4 x float] %value) #0 { - store [4 x float] %value, ptr %out, align 4 +define void @bitcast_array_v4i32_v4f32(ptr addrspace(5) %out, [4 x float] %value) #0 { + store [4 x float] %value, ptr addrspace(5) %out, align 4 ret void } -define void @multi_return_bitcast_struct_v3f32_v3f32(i1 %cond, ptr %out, <3 x float> %value) #0 { +define void @multi_return_bitcast_struct_v3f32_v3f32(i1 %cond, ptr addrspace(5) %out, <3 x float> %value) #0 { entry: br i1 %cond, label %ret0, label %ret1 ret0: %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> - store <4 x float> %extractVec, ptr %out, align 16 + store <4 x float> %extractVec, ptr addrspace(5) %out, align 16 ret void ret1: %load = load <4 x float>, ptr addrspace(1) poison - store <4 x float> %load, ptr %out, align 16 + store <4 x float> %load, ptr addrspace(5) %out, align 16 ret void } -define void @bitcast_v3f32_struct_v3f32(ptr %out, %struct.v3f32 %value) #0 { - store %struct.v3f32 %value, ptr %out, align 4 +define void @bitcast_v3f32_struct_v3f32(ptr addrspace(5) %out, %struct.v3f32 %value) #0 { + store %struct.v3f32 %value, ptr addrspace(5) %out, align 4 ret void } @@ -435,82 +437,82 @@ attributes #2 = { alwaysinline nounwind } ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_no_use -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_byval_arg -; CHECK-SAME: (ptr byval(i32) [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32 0, ptr [[VAL]], align 4 +; CHECK-SAME: (ptr addrspace(5) byval(i32) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[VAL]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_optnone -; CHECK-SAME: (ptr byval(i32) [[VAL:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: store i32 0, ptr [[VAL]], align 4 +; CHECK-SAME: (ptr addrspace(5) byval(i32) [[VAL:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[VAL]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_volatile -; CHECK-SAME: (ptr byval(i32) [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store volatile i32 0, ptr [[VAL]], align 4 +; CHECK-SAME: (ptr addrspace(5) byval(i32) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store volatile i32 0, ptr addrspace(5) [[VAL]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_atomic -; CHECK-SAME: (ptr byval(i32) [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store atomic i32 0, ptr [[VAL]] seq_cst, align 4 +; CHECK-SAME: (ptr addrspace(5) byval(i32) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store atomic i32 0, ptr addrspace(5) [[VAL]] seq_cst, align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_store_pointer_val -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store ptr [[VAL]], ptr poison, align 8 +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store ptr addrspace(5) [[VAL]], ptr poison, align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_store_gep -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[VAL]], i32 1 -; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[VAL]], i32 1 +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[GEP]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_sret -; CHECK-SAME: (ptr sret(i32) [[SRET:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32 1, ptr [[SRET]], align 4 -; CHECK-NEXT: store i32 0, ptr [[OUT]], align 4 +; CHECK-SAME: (ptr addrspace(5) sret(i32) [[SRET:%.*]], ptr addrspace(5) [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 1, ptr addrspace(5) [[SRET]], align 4 +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[OUT]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use.body -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_I32_1_USE:%.*]] zeroinitializer ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE:%.*]] @void_one_out_arg_i32_1_use.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE:%.*]] @void_one_out_arg_i32_1_use.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use_align.body -; CHECK-SAME: (ptr align 8 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) align 8 [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN:%.*]] zeroinitializer ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use_align -; CHECK-SAME: (ptr align 8 [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN:%.*]] @void_one_out_arg_i32_1_use_align.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) align 8 [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN:%.*]] @void_one_out_arg_i32_1_use_align.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN]] [[TMP2]], 0 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 8 +; CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[TMP0]], align 8 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_use.body -; CHECK-SAME: (i1 [[ARG0:%.*]], ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i1 [[ARG0:%.*]], ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[ARG0]], label [[RET0:%.*]], label [[RET1:%.*]] ; CHECK: ret0: ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_I32_2_USE:%.*]] zeroinitializer @@ -519,192 +521,192 @@ attributes #2 = { alwaysinline nounwind } ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_use -; CHECK-SAME: (i1 [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_USE:%.*]] @void_one_out_arg_i32_2_use.body(i1 [[TMP0]], ptr poison) +; CHECK-SAME: (i1 [[TMP0:%.*]], ptr addrspace(5) [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_USE:%.*]] @void_one_out_arg_i32_2_use.body(i1 [[TMP0]], ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_2_USE]] [[TMP3]], 0 -; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores.body -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32 0, ptr [[VAL]], align 4 +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[VAL]], align 4 ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_I32_2_STORES:%.*]] { i32 1 } ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES:%.*]] @void_one_out_arg_i32_2_stores.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES:%.*]] @void_one_out_arg_i32_2_stores.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_2_STORES]] [[TMP2]], 0 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores_clobber.body -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32 0, ptr [[VAL]], align 4 +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[VAL]], align 4 ; CHECK-NEXT: call void @may.clobber() ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER:%.*]] { i32 1 } ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores_clobber -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER:%.*]] @void_one_out_arg_i32_2_stores_clobber.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER:%.*]] @void_one_out_arg_i32_2_stores_clobber.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER]] [[TMP2]], 0 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_call_may_clobber -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32 0, ptr [[VAL]], align 4 +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[VAL]], align 4 ; CHECK-NEXT: call void @may.clobber() ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_pre_call_may_clobber.body -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @may.clobber() ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER:%.*]] zeroinitializer ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_pre_call_may_clobber -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER:%.*]] @void_one_out_arg_i32_pre_call_may_clobber.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER:%.*]] @void_one_out_arg_i32_pre_call_may_clobber.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER]] [[TMP2]], 0 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_reload -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32 0, ptr [[VAL]], align 4 -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[VAL]], align 4 +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[VAL]], align 4 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[VAL]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_store_in_different_block -; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) poison, align 4 -; CHECK-NEXT: store i32 0, ptr [[OUT]], align 4 +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[OUT]], align 4 ; CHECK-NEXT: br label [[RET:%.*]] ; CHECK: ret: ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@unused_out_arg_one_branch -; CHECK-SAME: (i1 [[ARG0:%.*]], ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i1 [[ARG0:%.*]], ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[ARG0]], label [[RET0:%.*]], label [[RET1:%.*]] ; CHECK: ret0: ; CHECK-NEXT: ret void ; CHECK: ret1: -; CHECK-NEXT: store i32 9, ptr [[VAL]], align 4 +; CHECK-NEXT: store i32 9, ptr addrspace(5) [[VAL]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_v2i32_1_use.body -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_V2I32_1_USE:%.*]] { <2 x i32> } ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_v2i32_1_use -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_V2I32_1_USE:%.*]] @void_one_out_arg_v2i32_1_use.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_V2I32_1_USE:%.*]] @void_one_out_arg_v2i32_1_use.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_V2I32_1_USE]] [[TMP2]], 0 -; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP0]], align 8 +; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP0]], align 8 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_struct_1_use.body -; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_STRUCT_1_USE:%.*]] { [[STRUCT:%.*]] { i32 9, i8 99, float 4.000000e+00 } } ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_struct_1_use -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_STRUCT_1_USE:%.*]] @void_one_out_arg_struct_1_use.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_STRUCT_1_USE:%.*]] @void_one_out_arg_struct_1_use.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_STRUCT_1_USE]] [[TMP2]], 0 -; CHECK-NEXT: store [[STRUCT:%.*]] [[TMP3]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store [[STRUCT:%.*]] [[TMP3]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@i32_one_out_arg_i32_1_use.body -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[I32_ONE_OUT_ARG_I32_1_USE:%.*]] { i32 9, i32 24 } ; ; ; CHECK-LABEL: define {{[^@]+}}@i32_one_out_arg_i32_1_use -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[I32_ONE_OUT_ARG_I32_1_USE:%.*]] @i32_one_out_arg_i32_1_use.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[I32_ONE_OUT_ARG_I32_1_USE:%.*]] @i32_one_out_arg_i32_1_use.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[I32_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[I32_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0 ; CHECK-NEXT: ret i32 [[TMP4]] ; ; ; CHECK-LABEL: define {{[^@]+}}@unused_different_type.body -; CHECK-SAME: (ptr [[ARG0:%.*]], ptr nocapture [[ARG1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[ARG0:%.*]], ptr addrspace(5) nocapture [[ARG1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[UNUSED_DIFFERENT_TYPE:%.*]] { float 4.000000e+00 } ; ; ; CHECK-LABEL: define {{[^@]+}}@unused_different_type -; CHECK-SAME: (ptr [[TMP0:%.*]], ptr nocapture [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[UNUSED_DIFFERENT_TYPE:%.*]] @unused_different_type.body(ptr [[TMP0]], ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], ptr addrspace(5) nocapture [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[UNUSED_DIFFERENT_TYPE:%.*]] @unused_different_type.body(ptr addrspace(5) [[TMP0]], ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[UNUSED_DIFFERENT_TYPE]] [[TMP3]], 0 -; CHECK-NEXT: store float [[TMP4]], ptr [[TMP1]], align 4 +; CHECK-NEXT: store float [[TMP4]], ptr addrspace(5) [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_noalias.body -; CHECK-SAME: (ptr noalias [[OUT0:%.*]], ptr noalias [[OUT1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) noalias [[OUT0:%.*]], ptr addrspace(5) noalias [[OUT1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[MULTIPLE_SAME_RETURN_NOALIAS:%.*]] { i32 1, i32 2 } ; ; ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_noalias -; CHECK-SAME: (ptr noalias [[TMP0:%.*]], ptr noalias [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_NOALIAS:%.*]] @multiple_same_return_noalias.body(ptr poison, ptr poison) +; CHECK-SAME: (ptr addrspace(5) noalias [[TMP0:%.*]], ptr addrspace(5) noalias [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_NOALIAS:%.*]] @multiple_same_return_noalias.body(ptr addrspace(5) poison, ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_NOALIAS]] [[TMP3]], 0 -; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_NOALIAS]] [[TMP3]], 1 -; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias.body -; CHECK-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT0:%.*]], ptr addrspace(5) [[OUT1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[MULTIPLE_SAME_RETURN_MAYALIAS:%.*]] { i32 2, i32 1 } ; ; ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias -; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS:%.*]] @multiple_same_return_mayalias.body(ptr poison, ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], ptr addrspace(5) [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS:%.*]] @multiple_same_return_mayalias.body(ptr addrspace(5) poison, ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS]] [[TMP3]], 0 -; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS]] [[TMP3]], 1 -; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias_order.body -; CHECK-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT0:%.*]], ptr addrspace(5) [[OUT1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER:%.*]] { i32 1, i32 2 } ; ; ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias_order -; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER:%.*]] @multiple_same_return_mayalias_order.body(ptr poison, ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], ptr addrspace(5) [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER:%.*]] @multiple_same_return_mayalias_order.body(ptr addrspace(5) poison, ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER]] [[TMP3]], 0 -; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER]] [[TMP3]], 1 -; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@store_in_entry_block -; CHECK-SAME: (i1 [[ARG0:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i1 [[ARG0:%.*]], ptr addrspace(5) [[OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(1) poison, align 4 -; CHECK-NEXT: store i32 [[VAL0]], ptr [[OUT]], align 4 +; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(5) [[OUT]], align 4 ; CHECK-NEXT: br i1 [[ARG0]], label [[IF:%.*]], label [[ENDIF:%.*]] ; CHECK: if: ; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(1) poison, align 4 @@ -715,57 +717,57 @@ attributes #2 = { alwaysinline nounwind } ; ; ; CHECK-LABEL: define {{[^@]+}}@i1_one_out_arg_i32_1_use.body -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[I1_ONE_OUT_ARG_I32_1_USE:%.*]] { i1 true, i32 24 } ; ; ; CHECK-LABEL: define {{[^@]+}}@i1_one_out_arg_i32_1_use -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[I1_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_one_out_arg_i32_1_use.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[I1_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_one_out_arg_i32_1_use.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[I1_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[I1_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0 ; CHECK-NEXT: ret i1 [[TMP4]] ; ; ; CHECK-LABEL: define {{[^@]+}}@i1_zeroext_one_out_arg_i32_1_use.body -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE:%.*]] { i1 true, i32 24 } ; ; ; CHECK-LABEL: define {{[^@]+}}@i1_zeroext_one_out_arg_i32_1_use -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_zeroext_one_out_arg_i32_1_use.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_zeroext_one_out_arg_i32_1_use.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0 ; CHECK-NEXT: ret i1 [[TMP4]] ; ; ; CHECK-LABEL: define {{[^@]+}}@i1_signext_one_out_arg_i32_1_use.body -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE:%.*]] { i1 true, i32 24 } ; ; ; CHECK-LABEL: define {{[^@]+}}@i1_signext_one_out_arg_i32_1_use -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_signext_one_out_arg_i32_1_use.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_signext_one_out_arg_i32_1_use.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0 ; CHECK-NEXT: ret i1 [[TMP4]] ; ; ; CHECK-LABEL: define {{[^@]+}}@p1i32_noalias_one_out_arg_i32_1_use.body -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE:%.*]] { ptr addrspace(1) null, i32 24 } ; ; ; CHECK-LABEL: define {{[^@]+}}@p1i32_noalias_one_out_arg_i32_1_use -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE:%.*]] @p1i32_noalias_one_out_arg_i32_1_use.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE:%.*]] @p1i32_noalias_one_out_arg_i32_1_use.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0 ; CHECK-NEXT: ret ptr addrspace(1) [[TMP4]] ; @@ -777,63 +779,63 @@ attributes #2 = { alwaysinline nounwind } ; ; ; CHECK-LABEL: define {{[^@]+}}@func_ptr_type.body -; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[FUNC:%.*]] = load ptr, ptr poison, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[FUNC_PTR_TYPE:%.*]] poison, ptr [[FUNC]], 0 ; CHECK-NEXT: ret [[FUNC_PTR_TYPE]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@func_ptr_type -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[FUNC_PTR_TYPE:%.*]] @func_ptr_type.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[FUNC_PTR_TYPE:%.*]] @func_ptr_type.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[FUNC_PTR_TYPE]] [[TMP2]], 0 -; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP0]], align 8 +; CHECK-NEXT: store ptr [[TMP3]], ptr addrspace(5) [[TMP0]], align 8 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_func_ptr_type.body -; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[FUNC:%.*]] = load ptr, ptr poison, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_FUNC_PTR_TYPE:%.*]] poison, ptr [[FUNC]], 0 ; CHECK-NEXT: ret [[BITCAST_FUNC_PTR_TYPE]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_func_ptr_type -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_FUNC_PTR_TYPE:%.*]] @bitcast_func_ptr_type.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_FUNC_PTR_TYPE:%.*]] @bitcast_func_ptr_type.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_FUNC_PTR_TYPE]] [[TMP2]], 0 -; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP0]], align 8 +; CHECK-NEXT: store ptr [[TMP3]], ptr addrspace(5) [[TMP0]], align 8 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@out_arg_small_array.body -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[OUT_ARG_SMALL_ARRAY:%.*]] { [4 x i32] [i32 0, i32 1, i32 2, i32 3] } ; ; ; CHECK-LABEL: define {{[^@]+}}@out_arg_small_array -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[OUT_ARG_SMALL_ARRAY:%.*]] @out_arg_small_array.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[OUT_ARG_SMALL_ARRAY:%.*]] @out_arg_small_array.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[OUT_ARG_SMALL_ARRAY]] [[TMP2]], 0 -; CHECK-NEXT: store [4 x i32] [[TMP3]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store [4 x i32] [[TMP3]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@out_arg_large_array -; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store [17 x i32] zeroinitializer, ptr [[VAL]], align 4 +; CHECK-SAME: (ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store [17 x i32] zeroinitializer, ptr addrspace(5) [[VAL]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@num_regs_return_limit -; CHECK-SAME: (ptr [[OUT:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LOAD:%.*]] = load volatile <16 x i32>, ptr addrspace(1) poison, align 64 -; CHECK-NEXT: store i32 [[VAL]], ptr [[OUT]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(5) [[OUT]], align 4 ; CHECK-NEXT: ret <16 x i32> [[LOAD]] ; ; ; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit.body -; CHECK-SAME: (ptr [[OUT:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LOAD:%.*]] = load volatile [15 x i32], ptr addrspace(1) poison, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT:%.*]] poison, [15 x i32] [[LOAD]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT]] [[TMP1]], i32 [[VAL]], 1 @@ -841,16 +843,16 @@ attributes #2 = { alwaysinline nounwind } ; ; ; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit -; CHECK-SAME: (ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[NUM_REGS_REACH_LIMIT:%.*]] @num_regs_reach_limit.body(ptr poison, i32 [[TMP1]]) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[NUM_REGS_REACH_LIMIT:%.*]] @num_regs_reach_limit.body(ptr addrspace(5) poison, i32 [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT]] [[TMP3]], 1 -; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT]] [[TMP3]], 0 ; CHECK-NEXT: ret [15 x i32] [[TMP5]] ; ; ; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit_leftover.body -; CHECK-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], i32 [[VAL0:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT0:%.*]], ptr addrspace(5) [[OUT1:%.*]], i32 [[VAL0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LOAD0:%.*]] = load volatile [15 x i32], ptr addrspace(1) poison, align 4 ; CHECK-NEXT: [[LOAD1:%.*]] = load volatile i32, ptr addrspace(1) poison, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER:%.*]] poison, [15 x i32] [[LOAD0]], 0 @@ -860,267 +862,267 @@ attributes #2 = { alwaysinline nounwind } ; ; ; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit_leftover -; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP4:%.*]] = call [[NUM_REGS_REACH_LIMIT_LEFTOVER:%.*]] @num_regs_reach_limit_leftover.body(ptr poison, ptr poison, i32 [[TMP2]]) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], ptr addrspace(5) [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP4:%.*]] = call [[NUM_REGS_REACH_LIMIT_LEFTOVER:%.*]] @num_regs_reach_limit_leftover.body(ptr addrspace(5) poison, ptr addrspace(5) poison, i32 [[TMP2]]) ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP4]], 1 -; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP4]], 2 -; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP6]], ptr addrspace(5) [[TMP1]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP4]], 0 ; CHECK-NEXT: ret [15 x i32] [[TMP7]] ; ; ; CHECK-LABEL: define {{[^@]+}}@preserve_debug_info.body -; CHECK-SAME: (i32 [[ARG0:%.*]], ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i32 [[ARG0:%.*]], ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @may.clobber(), !dbg [[DBG5:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[PRESERVE_DEBUG_INFO:%.*]] poison, i32 [[ARG0]], 0, !dbg [[DBG11:![0-9]+]] ; CHECK-NEXT: ret [[PRESERVE_DEBUG_INFO]] [[TMP1]], !dbg [[DBG11]] ; ; ; CHECK-LABEL: define {{[^@]+}}@preserve_debug_info -; CHECK-SAME: (i32 [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR2]] !dbg [[DBG6:![0-9]+]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[PRESERVE_DEBUG_INFO:%.*]] @preserve_debug_info.body(i32 [[TMP0]], ptr poison) +; CHECK-SAME: (i32 [[TMP0:%.*]], ptr addrspace(5) [[TMP1:%.*]]) #[[ATTR2]] !dbg [[DBG6:![0-9]+]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[PRESERVE_DEBUG_INFO:%.*]] @preserve_debug_info.body(i32 [[TMP0]], ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[PRESERVE_DEBUG_INFO]] [[TMP3]], 0 -; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@preserve_metadata.body -; CHECK-SAME: (i32 [[ARG0:%.*]], ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i32 [[ARG0:%.*]], ptr addrspace(5) [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @may.clobber() ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[PRESERVE_METADATA:%.*]] poison, i32 [[ARG0]], 0 ; CHECK-NEXT: ret [[PRESERVE_METADATA]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@preserve_metadata -; CHECK-SAME: (i32 [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR2]] !kernel_arg_access_qual !12 { -; CHECK-NEXT: [[TMP3:%.*]] = call [[PRESERVE_METADATA:%.*]] @preserve_metadata.body(i32 [[TMP0]], ptr poison) +; CHECK-SAME: (i32 [[TMP0:%.*]], ptr addrspace(5) [[TMP1:%.*]]) #[[ATTR2]] !kernel_arg_access_qual !12 { +; CHECK-NEXT: [[TMP3:%.*]] = call [[PRESERVE_METADATA:%.*]] @preserve_metadata.body(i32 [[TMP0]], ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[PRESERVE_METADATA]] [[TMP3]], 0 -; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3i32.body -; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LOAD:%.*]] = load volatile <4 x i32>, ptr addrspace(1) poison, align 16 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3I32:%.*]] poison, <4 x i32> [[LOAD]], 0 ; CHECK-NEXT: ret [[BITCAST_POINTER_V4I32_V3I32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3i32 -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3I32:%.*]] @bitcast_pointer_v4i32_v3i32.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3I32:%.*]] @bitcast_pointer_v4i32_v3i32.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_V4I32_V3I32]] [[TMP2]], 0 -; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP0]], align 16 +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr addrspace(5) [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3f32.body -; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LOAD:%.*]] = load volatile <4 x i32>, ptr addrspace(1) poison, align 16 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3F32:%.*]] poison, <4 x i32> [[LOAD]], 0 ; CHECK-NEXT: ret [[BITCAST_POINTER_V4I32_V3F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3f32 -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3F32:%.*]] @bitcast_pointer_v4i32_v3f32.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3F32:%.*]] @bitcast_pointer_v4i32_v3f32.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_V4I32_V3F32]] [[TMP2]], 0 -; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP0]], align 16 +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr addrspace(5) [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f32.body -; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) poison, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F32:%.*]] poison, i32 [[LOAD]], 0 ; CHECK-NEXT: ret [[BITCAST_POINTER_I32_F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f32 -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F32:%.*]] @bitcast_pointer_i32_f32.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F32:%.*]] @bitcast_pointer_i32_f32.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_I32_F32]] [[TMP2]], 0 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16.body -; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) poison, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F16:%.*]] poison, i32 [[LOAD]], 0 ; CHECK-NEXT: ret [[BITCAST_POINTER_I32_F16]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16 -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F16:%.*]] @bitcast_pointer_i32_f16.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F16:%.*]] @bitcast_pointer_i32_f16.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_I32_F16]] [[TMP2]], 0 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32.body -; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LOAD:%.*]] = load volatile half, ptr addrspace(1) poison, align 2 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_F16_I32:%.*]] poison, half [[LOAD]], 0 ; CHECK-NEXT: ret [[BITCAST_POINTER_F16_I32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32 -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_F16_I32:%.*]] @bitcast_pointer_f16_i32.body(ptr poison) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_F16_I32:%.*]] @bitcast_pointer_f16_i32.body(ptr addrspace(5) poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_F16_I32]] [[TMP2]], 0 -; CHECK-NEXT: store half [[TMP3]], ptr [[TMP0]], align 2 +; CHECK-NEXT: store half [[TMP3]], ptr addrspace(5) [[TMP0]], align 2 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3f32.body -; CHECK-SAME: (ptr [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3f32 -; CHECK-SAME: (ptr [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3F32:%.*]] @bitcast_struct_v3f32_v3f32.body(ptr poison, <3 x float> [[TMP1]]) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3F32:%.*]] @bitcast_struct_v3f32_v3f32.body(ptr addrspace(5) poison, <3 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP3]], 0 -; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[TMP0]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr addrspace(5) [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3i32.body -; CHECK-SAME: (ptr [[OUT:%.*]], <3 x i32> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]], <3 x i32> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[VALUE]], <3 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3I32:%.*]] poison, <4 x i32> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3i32 -; CHECK-SAME: (ptr [[TMP0:%.*]], <3 x i32> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3I32:%.*]] @bitcast_struct_v3f32_v3i32.body(ptr poison, <3 x i32> [[TMP1]]) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], <3 x i32> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3I32:%.*]] @bitcast_struct_v3f32_v3i32.body(ptr addrspace(5) poison, <3 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP3]], 0 -; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP0]], align 16 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr addrspace(5) [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32.body -; CHECK-SAME: (ptr [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V4F32:%.*]] poison, <4 x float> [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32 -; CHECK-SAME: (ptr [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V4F32:%.*]] @bitcast_struct_v4f32_v4f32.body(ptr poison, <4 x float> [[TMP1]]) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V4F32:%.*]] @bitcast_struct_v4f32_v4f32.body(ptr addrspace(5) poison, <4 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP3]], 0 -; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[TMP0]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr addrspace(5) [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32.body -; CHECK-SAME: (ptr [[OUT:%.*]], <4 x i32> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]], <4 x i32> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V4I32:%.*]] poison, <4 x i32> [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32 -; CHECK-SAME: (ptr [[TMP0:%.*]], <4 x i32> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V4I32:%.*]] @bitcast_struct_v3f32_v4i32.body(ptr poison, <4 x i32> [[TMP1]]) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], <4 x i32> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V4I32:%.*]] @bitcast_struct_v3f32_v4i32.body(ptr addrspace(5) poison, <4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP3]], 0 -; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP0]], align 16 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr addrspace(5) [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v3f32.body -; CHECK-SAME: (ptr [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v3f32 -; CHECK-SAME: (ptr [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V3F32:%.*]] @bitcast_struct_v4f32_v3f32.body(ptr poison, <3 x float> [[TMP1]]) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V3F32:%.*]] @bitcast_struct_v4f32_v3f32.body(ptr addrspace(5) poison, <3 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP3]], 0 -; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[TMP0]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr addrspace(5) [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32.body -; CHECK-SAME: (ptr [[OUT:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V2F32:%.*]] poison, <2 x float> [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V2F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32 -; CHECK-SAME: (ptr [[TMP0:%.*]], <2 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V2F32:%.*]] @bitcast_struct_v3f32_v2f32.body(ptr poison, <2 x float> [[TMP1]]) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], <2 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V2F32:%.*]] @bitcast_struct_v3f32_v2f32.body(ptr addrspace(5) poison, <2 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V2F32]] [[TMP3]], 0 -; CHECK-NEXT: store <2 x float> [[TMP4]], ptr [[TMP0]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP4]], ptr addrspace(5) [[TMP0]], align 8 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32.body -; CHECK-SAME: (ptr [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_F32_V3F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32 -; CHECK-SAME: (ptr [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] @bitcast_struct_v3f32_f32_v3f32.body(ptr poison, <3 x float> [[TMP1]]) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] @bitcast_struct_v3f32_f32_v3f32.body(ptr addrspace(5) poison, <3 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_F32_V3F32]] [[TMP3]], 0 -; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[TMP0]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr addrspace(5) [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32.body -; CHECK-SAME: (ptr [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] poison, <4 x float> [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_F32_V4F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32 -; CHECK-SAME: (ptr [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] @bitcast_struct_v3f32_f32_v4f32.body(ptr poison, <4 x float> [[TMP1]]) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] @bitcast_struct_v3f32_f32_v4f32.body(ptr addrspace(5) poison, <4 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_F32_V4F32]] [[TMP3]], 0 -; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[TMP0]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr addrspace(5) [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32.body -; CHECK-SAME: (ptr [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_I128_V4F32:%.*]] poison, <4 x float> [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_I128_V4F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32 -; CHECK-SAME: (ptr [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_I128_V4F32:%.*]] @bitcast_struct_i128_v4f32.body(ptr poison, <4 x float> [[TMP1]]) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_I128_V4F32:%.*]] @bitcast_struct_i128_v4f32.body(ptr addrspace(5) poison, <4 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_I128_V4F32]] [[TMP3]], 0 -; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[TMP0]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr addrspace(5) [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32.body -; CHECK-SAME: (ptr [[OUT:%.*]], [4 x float] [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]], [4 x float] [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_ARRAY_V4I32_V4F32:%.*]] poison, [4 x float] [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_ARRAY_V4I32_V4F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32 -; CHECK-SAME: (ptr [[TMP0:%.*]], [4 x float] [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_ARRAY_V4I32_V4F32:%.*]] @bitcast_array_v4i32_v4f32.body(ptr poison, [4 x float] [[TMP1]]) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], [4 x float] [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_ARRAY_V4I32_V4F32:%.*]] @bitcast_array_v4i32_v4f32.body(ptr addrspace(5) poison, [4 x float] [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_ARRAY_V4I32_V4F32]] [[TMP3]], 0 -; CHECK-NEXT: store [4 x float] [[TMP4]], ptr [[TMP0]], align 4 +; CHECK-NEXT: store [4 x float] [[TMP4]], ptr addrspace(5) [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@multi_return_bitcast_struct_v3f32_v3f32.body -; CHECK-SAME: (i1 [[COND:%.*]], ptr [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i1 [[COND:%.*]], ptr addrspace(5) [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND]], label [[RET0:%.*]], label [[RET1:%.*]] ; CHECK: ret0: @@ -1134,23 +1136,23 @@ attributes #2 = { alwaysinline nounwind } ; ; ; CHECK-LABEL: define {{[^@]+}}@multi_return_bitcast_struct_v3f32_v3f32 -; CHECK-SAME: (i1 [[TMP0:%.*]], ptr [[TMP1:%.*]], <3 x float> [[TMP2:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP4:%.*]] = call [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] @multi_return_bitcast_struct_v3f32_v3f32.body(i1 [[TMP0]], ptr poison, <3 x float> [[TMP2]]) +; CHECK-SAME: (i1 [[TMP0:%.*]], ptr addrspace(5) [[TMP1:%.*]], <3 x float> [[TMP2:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP4:%.*]] = call [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] @multi_return_bitcast_struct_v3f32_v3f32.body(i1 [[TMP0]], ptr addrspace(5) poison, <3 x float> [[TMP2]]) ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP4]], 0 -; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP1]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP5]], ptr addrspace(5) [[TMP1]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32.body -; CHECK-SAME: (ptr [[OUT:%.*]], [[STRUCT_V3F32:%.*]] [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr addrspace(5) [[OUT:%.*]], [[STRUCT_V3F32:%.*]] [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_V3F32_STRUCT_V3F32:%.*]] poison, [[STRUCT_V3F32]] [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_V3F32_STRUCT_V3F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32 -; CHECK-SAME: (ptr [[TMP0:%.*]], [[STRUCT_V3F32:%.*]] [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_V3F32_STRUCT_V3F32:%.*]] @bitcast_v3f32_struct_v3f32.body(ptr poison, [[STRUCT_V3F32]] [[TMP1]]) +; CHECK-SAME: (ptr addrspace(5) [[TMP0:%.*]], [[STRUCT_V3F32:%.*]] [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_V3F32_STRUCT_V3F32:%.*]] @bitcast_v3f32_struct_v3f32.body(ptr addrspace(5) poison, [[STRUCT_V3F32]] [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_V3F32_STRUCT_V3F32]] [[TMP3]], 0 -; CHECK-NEXT: store [[STRUCT_V3F32]] [[TMP4]], ptr [[TMP0]], align 16 +; CHECK-NEXT: store [[STRUCT_V3F32]] [[TMP4]], ptr addrspace(5) [[TMP0]], align 16 ; CHECK-NEXT: ret void ; From ba11d314a67638454989d4e0aebae64145d1a8ac Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 26 Oct 2023 16:15:29 +0200 Subject: [PATCH 073/877] [clang][Interp] Only diagnose null field access in constant contexts (#69223) Looks like this should work as long as we don't dereference the value. --- clang/lib/AST/Interp/Interp.cpp | 2 +- clang/lib/AST/Interp/Interp.h | 2 +- clang/lib/AST/Interp/Pointer.h | 28 +++++++++++++++++++++----- clang/test/AST/Interp/c.c | 14 +++++++++++++ clang/test/AST/Interp/records.cpp | 33 +++++++++++++++++++++++++++++++ 5 files changed, 72 insertions(+), 7 deletions(-) diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp index 31d43b6010c18..1ebbadc375f38 100644 --- a/clang/lib/AST/Interp/Interp.cpp +++ b/clang/lib/AST/Interp/Interp.cpp @@ -216,7 +216,7 @@ bool CheckLive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, } bool CheckDummy(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { - return !Ptr.isDummy(); + return !Ptr.isZero() && !Ptr.isDummy(); } bool CheckNull(InterpState &S, CodePtr OpPC, const Pointer &Ptr, diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index 3e1b4f32e8b69..7dd415d6e4605 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -1155,7 +1155,7 @@ inline bool GetPtrGlobal(InterpState &S, CodePtr OpPC, uint32_t I) { /// 2) Pushes Pointer.atField(Off) on the stack inline bool GetPtrField(InterpState &S, CodePtr OpPC, uint32_t Off) { const Pointer &Ptr = S.Stk.pop(); - if (!CheckNull(S, OpPC, Ptr, CSK_Field)) + if (S.inConstantContext() && !CheckNull(S, OpPC, Ptr, CSK_Field)) return false; if (!CheckExtern(S, OpPC, Ptr)) return false; diff --git a/clang/lib/AST/Interp/Pointer.h b/clang/lib/AST/Interp/Pointer.h index b371b306fe7a7..843bcad16b5d1 100644 --- a/clang/lib/AST/Interp/Pointer.h +++ b/clang/lib/AST/Interp/Pointer.h @@ -199,7 +199,10 @@ class Pointer { bool isField() const { return Base != 0 && Base != RootPtrMark; } /// Accessor for information about the declaration site. - const Descriptor *getDeclDesc() const { return Pointee->Desc; } + const Descriptor *getDeclDesc() const { + assert(Pointee); + return Pointee->Desc; + } SourceLocation getDeclLoc() const { return getDeclDesc()->getLocation(); } /// Returns a pointer to the object of which this pointer is a field. @@ -296,11 +299,17 @@ class Pointer { bool isUnion() const; /// Checks if the storage is extern. - bool isExtern() const { return Pointee->isExtern(); } + bool isExtern() const { return Pointee && Pointee->isExtern(); } /// Checks if the storage is static. - bool isStatic() const { return Pointee->isStatic(); } + bool isStatic() const { + assert(Pointee); + return Pointee->isStatic(); + } /// Checks if the storage is temporary. - bool isTemporary() const { return Pointee->isTemporary(); } + bool isTemporary() const { + assert(Pointee); + return Pointee->isTemporary(); + } /// Checks if the storage is a static temporary. bool isStaticTemporary() const { return isStatic() && isTemporary(); } @@ -323,7 +332,10 @@ class Pointer { } /// Returns the declaration ID. - std::optional getDeclID() const { return Pointee->getDeclID(); } + std::optional getDeclID() const { + assert(Pointee); + return Pointee->getDeclID(); + } /// Returns the byte offset from the start. unsigned getByteOffset() const { @@ -351,6 +363,8 @@ class Pointer { /// Checks if the index is one past end. bool isOnePastEnd() const { + if (!Pointee) + return false; return isElementPastEnd() || getSize() == getOffset(); } @@ -360,6 +374,7 @@ class Pointer { /// Dereferences the pointer, if it's live. template T &deref() const { assert(isLive() && "Invalid pointer"); + assert(Pointee); if (isArrayRoot()) return *reinterpret_cast(Pointee->rawData() + Base + sizeof(InitMapPtr)); @@ -370,6 +385,7 @@ class Pointer { /// Dereferences a primitive element. template T &elem(unsigned I) const { assert(I < getNumElems()); + assert(Pointee); return reinterpret_cast(Pointee->data() + sizeof(InitMapPtr))[I]; } @@ -431,12 +447,14 @@ class Pointer { /// Returns a descriptor at a given offset. InlineDescriptor *getDescriptor(unsigned Offset) const { assert(Offset != 0 && "Not a nested pointer"); + assert(Pointee); return reinterpret_cast(Pointee->rawData() + Offset) - 1; } /// Returns a reference to the InitMapPtr which stores the initialization map. InitMapPtr &getInitMap() const { + assert(Pointee); return *reinterpret_cast(Pointee->rawData() + Base); } diff --git a/clang/test/AST/Interp/c.c b/clang/test/AST/Interp/c.c index e8aa8b8599f21..6bfcded0a7864 100644 --- a/clang/test/AST/Interp/c.c +++ b/clang/test/AST/Interp/c.c @@ -3,6 +3,8 @@ // RUN: %clang_cc1 -verify=ref -std=c11 %s // RUN: %clang_cc1 -pedantic -verify=pedantic-ref -std=c11 %s +typedef __INTPTR_TYPE__ intptr_t; + _Static_assert(1, ""); _Static_assert(0 != 1, ""); _Static_assert(1.0 == 1.0, ""); // pedantic-ref-warning {{not an integer constant expression}} \ @@ -67,3 +69,15 @@ _Static_assert(&Test50 != (void*)0, ""); // ref-warning {{always true}} \ // expected-warning {{always true}} \ // pedantic-expected-warning {{always true}} \ // pedantic-expected-warning {{is a GNU extension}} + +struct y {int x,y;}; +int a2[(intptr_t)&((struct y*)0)->y]; // expected-warning {{folded to constant array}} \ + // pedantic-expected-warning {{folded to constant array}} \ + // ref-warning {{folded to constant array}} \ + // pedantic-ref-warning {{folded to constant array}} + +const struct y *yy = (struct y*)0; +const intptr_t L = (intptr_t)(&(yy->y)); // expected-error {{not a compile-time constant}} \ + // pedantic-expected-error {{not a compile-time constant}} \ + // ref-error {{not a compile-time constant}} \ + // pedantic-ref-error {{not a compile-time constant}} diff --git a/clang/test/AST/Interp/records.cpp b/clang/test/AST/Interp/records.cpp index e899e37915f03..280eaf34898ce 100644 --- a/clang/test/AST/Interp/records.cpp +++ b/clang/test/AST/Interp/records.cpp @@ -1102,3 +1102,36 @@ namespace DelegatingConstructors { static_assert(d4.a == 10, ""); static_assert(d4.b == 12, ""); } + +namespace AccessOnNullptr { + struct F { + int a; + }; + + constexpr int a() { // expected-error {{never produces a constant expression}} \ + // ref-error {{never produces a constant expression}} + F *f = nullptr; + + f->a = 0; // expected-note 2{{cannot access field of null pointer}} \ + // ref-note 2{{cannot access field of null pointer}} + return f->a; + } + static_assert(a() == 0, ""); // expected-error {{not an integral constant expression}} \ + // expected-note {{in call to 'a()'}} \ + // ref-error {{not an integral constant expression}} \ + // ref-note {{in call to 'a()'}} + + constexpr int a2() { // expected-error {{never produces a constant expression}} \ + // ref-error {{never produces a constant expression}} + F *f = nullptr; + + + const int *a = &(f->a); // expected-note 2{{cannot access field of null pointer}} \ + // ref-note 2{{cannot access field of null pointer}} + return f->a; + } + static_assert(a2() == 0, ""); // expected-error {{not an integral constant expression}} \ + // expected-note {{in call to 'a2()'}} \ + // ref-error {{not an integral constant expression}} \ + // ref-note {{in call to 'a2()'}} +} From aaabf50d521550c0f6c0b5c8623450eb56f485f5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 26 Oct 2023 15:33:51 +0100 Subject: [PATCH 074/877] [AArch64] Regenerate tests to show missing constant comments --- .../test/CodeGen/AArch64/arm64-abi-varargs.ll | 40 +++++++++---------- .../CodeGen/AArch64/arm64-build-vector.ll | 12 +++--- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll index d943afb23c03b..1b22514a59d60 100644 --- a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll @@ -64,37 +64,37 @@ define i32 @main() nounwind ssp { ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill -; CHECK-NEXT: mov w9, #1 -; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: mov w9, #1 ; =0x1 +; CHECK-NEXT: mov w8, #2 ; =0x2 ; CHECK-NEXT: stp w8, w9, [sp, #72] -; CHECK-NEXT: mov w9, #3 -; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w9, #3 ; =0x3 +; CHECK-NEXT: mov w8, #4 ; =0x4 ; CHECK-NEXT: stp w8, w9, [sp, #64] -; CHECK-NEXT: mov w9, #5 -; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w9, #5 ; =0x5 +; CHECK-NEXT: mov w8, #6 ; =0x6 ; CHECK-NEXT: stp w8, w9, [sp, #56] -; CHECK-NEXT: mov w9, #7 -; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: mov w9, #7 ; =0x7 +; CHECK-NEXT: mov w8, #8 ; =0x8 ; CHECK-NEXT: stp w8, w9, [sp, #48] -; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: mov w9, #10 +; CHECK-NEXT: mov w8, #9 ; =0x9 +; CHECK-NEXT: mov w9, #10 ; =0xa ; CHECK-NEXT: stp w9, w8, [sp, #40] -; CHECK-NEXT: mov w10, #11 -; CHECK-NEXT: mov w11, #12 +; CHECK-NEXT: mov w10, #11 ; =0xb +; CHECK-NEXT: mov w11, #12 ; =0xc ; CHECK-NEXT: stp w11, w10, [sp, #32] ; CHECK-NEXT: stp x10, x11, [sp, #16] ; CHECK-NEXT: str x9, [sp, #8] ; CHECK-NEXT: str w8, [sp] ; CHECK-NEXT: add x0, sp, #76 -; CHECK-NEXT: mov w1, #2 -; CHECK-NEXT: mov w2, #3 -; CHECK-NEXT: mov w3, #4 -; CHECK-NEXT: mov w4, #5 -; CHECK-NEXT: mov w5, #6 -; CHECK-NEXT: mov w6, #7 -; CHECK-NEXT: mov w7, #8 +; CHECK-NEXT: mov w1, #2 ; =0x2 +; CHECK-NEXT: mov w2, #3 ; =0x3 +; CHECK-NEXT: mov w3, #4 ; =0x4 +; CHECK-NEXT: mov w4, #5 ; =0x5 +; CHECK-NEXT: mov w5, #6 ; =0x6 +; CHECK-NEXT: mov w6, #7 ; =0x7 +; CHECK-NEXT: mov w7, #8 ; =0x8 ; CHECK-NEXT: bl _fn9 -; CHECK-NEXT: mov w0, #0 +; CHECK-NEXT: mov w0, #0 ; =0x0 ; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll index f9f57e662d6ae..68c56d765cbb9 100644 --- a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll +++ b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll @@ -24,7 +24,7 @@ define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind { define <8 x i16> @build_all_zero(<8 x i16> %a) #1 { ; CHECK-LABEL: build_all_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #44672 +; CHECK-NEXT: mov w8, #44672 // =0xae80 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret @@ -56,7 +56,7 @@ define <8 x i16> @concat_2_build_vector(<4 x i16> %in0) { define void @widen_f16_build_vector(ptr %addr) { ; CHECK-LABEL: widen_f16_build_vector: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #13294 +; CHECK-NEXT: mov w8, #13294 // =0x33ee ; CHECK-NEXT: movk w8, #13294, lsl #16 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret @@ -68,7 +68,7 @@ define void @widen_f16_build_vector(ptr %addr) { define <1 x i64> @single_element_vector_i64(<1 x i64> %arg) { ; CHECK-LABEL: single_element_vector_i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: add d0, d0, d1 ; CHECK-NEXT: ret @@ -94,7 +94,7 @@ define <1 x double> @convert_single_fp_vector_constant(i1 %cmp) { ; CHECK-LABEL: convert_single_fp_vector_constant: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov x8, #4607182418800017408 +; CHECK-NEXT: mov x8, #4607182418800017408 // =0x3ff0000000000000 ; CHECK-NEXT: csetm x9, ne ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 @@ -120,7 +120,7 @@ define <2 x double> @poszero_v2f64(<2 x double> %a) { define <2 x double> @negzero_v2f64(<2 x double> %a) { ; CHECK-LABEL: negzero_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret @@ -141,7 +141,7 @@ define <1 x double> @poszero_v1f64(<1 x double> %a) { define <1 x double> @negzero_v1f64(<1 x double> %a) { ; CHECK-LABEL: negzero_v1f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fmul d0, d0, d1 ; CHECK-NEXT: ret From 13a349425b55f4322ef4410cd6f54587aa80f1d0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 26 Oct 2023 15:35:09 +0100 Subject: [PATCH 075/877] [AArch64] Regenerate addr-of-ret-addr.ll --- llvm/test/CodeGen/AArch64/addr-of-ret-addr.ll | 62 ++++++++++++++----- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/addr-of-ret-addr.ll b/llvm/test/CodeGen/AArch64/addr-of-ret-addr.ll index 2de708d66f59f..b6ec9eb569436 100644 --- a/llvm/test/CodeGen/AArch64/addr-of-ret-addr.ll +++ b/llvm/test/CodeGen/AArch64/addr-of-ret-addr.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc < %s -frame-pointer=all -mtriple=arm64-windows | FileCheck %s ; Test generated from C code: @@ -15,18 +16,58 @@ declare void @llvm.va_start(ptr) declare ptr @llvm.addressofreturnaddress() define dso_local ptr @"foo"() { +; CHECK-LABEL: foo: +; CHECK: .seh_proc foo +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr_x 16 +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .seh_set_fp +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: add x0, x29, #8 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr_x 16 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc entry: %0 = call ptr @llvm.addressofreturnaddress() ret ptr %0 - -; CHECK-LABEL: foo -; CHECK: stp x29, x30, [sp, #-16]! -; CHECK: mov x29, sp -; CHECK: add x0, x29, #8 -; CHECK: ldp x29, x30, [sp], #16 } define dso_local i32 @"bar"(ptr %x, ...) { +; CHECK-LABEL: bar: +; CHECK: .seh_proc bar +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: .seh_stackalloc 96 +; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 16 +; CHECK-NEXT: add x29, sp, #16 +; CHECK-NEXT: .seh_add_fp 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: add x9, x29, #24 +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: stp x1, x2, [x29, #24] +; CHECK-NEXT: stp x9, x0, [sp] +; CHECK-NEXT: add x0, x29, #24 +; CHECK-NEXT: add x1, x29, #8 +; CHECK-NEXT: stp x3, x4, [x29, #40] +; CHECK-NEXT: stp x5, x6, [x29, #56] +; CHECK-NEXT: str x7, [x29, #72] +; CHECK-NEXT: blr x8 +; CHECK-NEXT: add w0, w0, #1 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 16 +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: .seh_stackalloc 96 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc entry: %x.addr = alloca ptr, align 8 %y = alloca ptr, align 8 @@ -38,13 +79,4 @@ entry: %call = call i32 %0(ptr %2, ptr %1) %add = add nsw i32 %call, 1 ret i32 %add - -; CHECK-LABEL: bar -; CHECK: sub sp, sp, #96 -; CHECK: stp x29, x30, [sp, #16] -; CHECK: add x29, sp, #16 -; CHECK: stp x1, x2, [x29, #24] -; CHECK: add x1, x29, #8 -; CHECK: ldp x29, x30, [sp, #16] -; CHECK: add sp, sp, #96 } From 53096f910ca874cf446417d0bf551c5d63e917b2 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 26 Oct 2023 07:36:43 -0700 Subject: [PATCH 076/877] [lldb] Try to fix build after d1556e5efbf0 Getting lots of `error: unknown type name 'uint64_t'` and `uint32_t` in this file on Linux. --- lldb/include/lldb/Target/RegisterFlags.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/include/lldb/Target/RegisterFlags.h b/lldb/include/lldb/Target/RegisterFlags.h index 7c5b97c2265fd..a088981918cb3 100644 --- a/lldb/include/lldb/Target/RegisterFlags.h +++ b/lldb/include/lldb/Target/RegisterFlags.h @@ -9,6 +9,7 @@ #ifndef LLDB_TARGET_REGISTERFLAGS_H #define LLDB_TARGET_REGISTERFLAGS_H +#include #include #include From 4afe550ba677ca8998f548a80c832ee0f940cca6 Mon Sep 17 00:00:00 2001 From: Kristof Beyls Date: Thu, 26 Oct 2023 16:38:32 +0200 Subject: [PATCH 077/877] [Security Group] add github names of security group members. (#69304) Also drop phabricator names as we no longer use phabricator. --------- Co-authored-by: Andy Kaylor --- llvm/docs/Security.rst | 45 ++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst index 2729541facbfe..61ba9aafaa394 100644 --- a/llvm/docs/Security.rst +++ b/llvm/docs/Security.rst @@ -31,28 +31,31 @@ Group Composition Security Group Members ---------------------- -The members of the group represent a wide cross-section of the community, and meet the criteria for inclusion below. The list is in the format `* ${full_name} (${affiliation}) [${phabricator_username}]`. If a phabricator username for an individual isn't available, the brackets will be empty. - -* Ahmed Bougacha (Apple) [ab] -* Andy Kaylor (Intel) [andykaylor] -* Artur Pilipenko (Azul Systems Inc) [apilipenko] -* Boovaragavan Dasarathan (Nvidia) [mrragava] -* Dimitry Andric (individual; FreeBSD) [dim] -* Ed Maste (individual; FreeBSD) [emaste] -* George Burgess IV (Google) [george.burgess.iv] -* Josh Stone (Red Hat; Rust) [cuviper] +The members of the group represent a wide cross-section of the community, and +meet the criteria for inclusion below. The list is in the format +`* ${full_name} (${affiliation}) [${github_username}]`. If a github +username for an individual isn't available, the brackets will be empty. + +* Ahmed Bougacha (Apple) [@ahmedbougacha] +* Andy Kaylor (Intel) [@andykaylor] +* Artur Pilipenko (Azul Systems Inc) [] +* Boovaragavan Dasarathan (Nvidia) [@mrragava] +* Dimitry Andric (individual; FreeBSD) [@DimitryAndric] +* Ed Maste (individual; FreeBSD) [@emaste] +* George Burgess IV (Google) [@gburgessiv] +* Josh Stone (Red Hat; Rust) [@cuviper] * Kate McInnes (Apple) [] -* Kristof Beyls (ARM) [kristof.beyls] -* Matthew Riley (Google) [mattdr] -* Nikhil Gupta (Nvidia) [nikhgupt] -* Oliver Hunt (Apple) [ojhunt] -* Paul Robinson (Sony) [probinson] -* Peter Smith (ARM) [peter.smith] -* Pietro Albini (Ferrous Systems; Rust) [pietroalbini] -* Serge Guelton (Mozilla) [serge-sans-paille] -* Sergey Maslov (Intel) [smaslov-intel] -* Shayne Hiet-Block (Microsoft) [Shayne] -* Tim Penge (Sony) [tpenge] +* Kristof Beyls (ARM) [@kbeyls] +* Matthew Riley (Google) [@mmdriley] +* Nikhil Gupta (Nvidia) [] +* Oliver Hunt (Apple) [@ojhunt] +* Paul Robinson (Sony) [@pogo59] +* Peter Smith (ARM) [@smithp35] +* Pietro Albini (Ferrous Systems; Rust) [@pietroalbini] +* Serge Guelton (Mozilla) [@serge-sans-paille] +* Sergey Maslov (Intel) [@smaslov-intel] +* Shayne Hiet-Block (Microsoft) [@GreatKeeper] +* Tim Penge (Sony) [] Criteria -------- From 7caff73e38038a98b837566eaf0e2e50754e2443 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 26 Oct 2023 15:39:10 +0100 Subject: [PATCH 078/877] [AMDGPU] Assert that we can find subregs in copyPhysReg. NFC. (#70332) This helped to catch a codegen failure caused by #69703. MachineVerifier did not complain about this malformed COPY either before regalloc: %9:vreg_64 = COPY %17:vgpr_32 Or after regalloc: renamable $vgpr0_vgpr1 = COPY renamable $vgpr2, implicit $exec But we can at least catch the problem when copyPhysReg tries to expand it into 32-bit register moves and fails to find suitable source registers: $vgpr0 = V_MOV_B32_e32 $noreg, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr2 $vgpr1 = V_MOV_B32_e32 $noreg, implicit $exec, implicit $vgpr2, implicit $exec --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 51 ++++++++++++++------------ 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ffcd415a66648..62f5a17635cee 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -742,23 +742,27 @@ static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) { int16_t SubIdx = BaseIndices[Idx]; - Register Reg = RI.getSubReg(DestReg, SubIdx); + Register DestSubReg = RI.getSubReg(DestReg, SubIdx); + Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx); + assert(DestSubReg && SrcSubReg && "Failed to find subregs!"); unsigned Opcode = AMDGPU::S_MOV_B32; // Is SGPR aligned? If so try to combine with next. - Register Src = RI.getSubReg(SrcReg, SubIdx); - bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0; - bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0; + bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0; + bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0; if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) { // Can use SGPR64 copy unsigned Channel = RI.getChannelFromSubReg(SubIdx); SubIdx = RI.getSubRegFromChannel(Channel, 2); + DestSubReg = RI.getSubReg(DestReg, SubIdx); + SrcSubReg = RI.getSubReg(SrcReg, SubIdx); + assert(DestSubReg && SrcSubReg && "Failed to find subregs!"); Opcode = AMDGPU::S_MOV_B64; Idx++; } - LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), RI.getSubReg(DestReg, SubIdx)) - .addReg(RI.getSubReg(SrcReg, SubIdx)) + LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), DestSubReg) + .addReg(SrcSubReg) .addReg(SrcReg, RegState::Implicit); if (!FirstMI) @@ -1098,6 +1102,9 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, SubIdx = SubIndices[Idx]; else SubIdx = SubIndices[SubIndices.size() - Idx - 1]; + Register DestSubReg = RI.getSubReg(DestReg, SubIdx); + Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx); + assert(DestSubReg && SrcSubReg && "Failed to find subregs!"); bool IsFirstSubreg = Idx == 0; bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1; @@ -1105,30 +1112,26 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Opcode == AMDGPU::INSTRUCTION_LIST_END) { Register ImpDefSuper = IsFirstSubreg ? Register(DestReg) : Register(); Register ImpUseSuper = SrcReg; - indirectCopyToAGPR(*this, MBB, MI, DL, RI.getSubReg(DestReg, SubIdx), - RI.getSubReg(SrcReg, SubIdx), UseKill, *RS, Overlap, - ImpDefSuper, ImpUseSuper); + indirectCopyToAGPR(*this, MBB, MI, DL, DestSubReg, SrcSubReg, UseKill, + *RS, Overlap, ImpDefSuper, ImpUseSuper); } else if (Opcode == AMDGPU::V_PK_MOV_B32) { - Register DstSubReg = RI.getSubReg(DestReg, SubIdx); - Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx); MachineInstrBuilder MIB = - BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DstSubReg) - .addImm(SISrcMods::OP_SEL_1) - .addReg(SrcSubReg) - .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) - .addReg(SrcSubReg) - .addImm(0) // op_sel_lo - .addImm(0) // op_sel_hi - .addImm(0) // neg_lo - .addImm(0) // neg_hi - .addImm(0) // clamp - .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); + BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestSubReg) + .addImm(SISrcMods::OP_SEL_1) + .addReg(SrcSubReg) + .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) + .addReg(SrcSubReg) + .addImm(0) // op_sel_lo + .addImm(0) // op_sel_hi + .addImm(0) // neg_lo + .addImm(0) // neg_hi + .addImm(0) // clamp + .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); if (IsFirstSubreg) MIB.addReg(DestReg, RegState::Define | RegState::Implicit); } else { MachineInstrBuilder Builder = - BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIdx)) - .addReg(RI.getSubReg(SrcReg, SubIdx)); + BuildMI(MBB, MI, DL, get(Opcode), DestSubReg).addReg(SrcSubReg); if (IsFirstSubreg) Builder.addReg(DestReg, RegState::Define | RegState::Implicit); From 3c58e53041fcdeae36ef5ca1e0683e0f1f16bf69 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 26 Oct 2023 15:42:09 +0100 Subject: [PATCH 079/877] [AMDGPU] Use const reference in SIInstrInfo::buildExtractSubReg. NFC. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 20 +++++++------------- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 13 ++++++------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 62f5a17635cee..327f8988ac2f1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5416,13 +5416,10 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { MO.ChangeToRegister(Reg, false); } -unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, - MachineRegisterInfo &MRI, - MachineOperand &SuperReg, - const TargetRegisterClass *SuperRC, - unsigned SubIdx, - const TargetRegisterClass *SubRC) - const { +unsigned SIInstrInfo::buildExtractSubReg( + MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, + const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, + unsigned SubIdx, const TargetRegisterClass *SubRC) const { MachineBasicBlock *MBB = MI->getParent(); DebugLoc DL = MI->getDebugLoc(); Register SubReg = MRI.createVirtualRegister(SubRC); @@ -5449,12 +5446,9 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, } MachineOperand SIInstrInfo::buildExtractSubRegOrImm( - MachineBasicBlock::iterator MII, - MachineRegisterInfo &MRI, - MachineOperand &Op, - const TargetRegisterClass *SuperRC, - unsigned SubIdx, - const TargetRegisterClass *SubRC) const { + MachineBasicBlock::iterator MII, MachineRegisterInfo &MRI, + const MachineOperand &Op, const TargetRegisterClass *SuperRC, + unsigned SubIdx, const TargetRegisterClass *SubRC) const { if (Op.isImm()) { if (SubIdx == AMDGPU::sub0) return MachineOperand::CreateImm(static_cast(Op.getImm())); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index a64cf0244e4c0..e6c64d909d3ee 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -102,16 +102,15 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { public: unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, - MachineOperand &SuperReg, + const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const; - MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, - MachineRegisterInfo &MRI, - MachineOperand &SuperReg, - const TargetRegisterClass *SuperRC, - unsigned SubIdx, - const TargetRegisterClass *SubRC) const; + MachineOperand buildExtractSubRegOrImm( + MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, + const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, + unsigned SubIdx, const TargetRegisterClass *SubRC) const; + private: void swapOperands(MachineInstr &Inst) const; From 35baff8b6ac4630d28db91d6b481d6cd5910931e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 26 Oct 2023 08:02:10 -0700 Subject: [PATCH 080/877] [AMDGPU] Correct assert that incorrectly chained multiple == operators. (#70291) I believe this assert was trying to check that 3 variables were equal to 0. I think it instead got interpreted as ((DSWCount == DSWWithPermCount) == DSWWithSharedVMEMCount) == 0 I guess (DSWCount == DSWWithPermCount) was true because both counts were 0. Then true got compared to DSWWithSharedVMEMCount, and since DSWWithSharedVMEMCount is 0, that compare was false. And then that false compared equal to the final 0. --- llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp index c67a21c639fc0..0b2bb98738be2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -1121,8 +1121,8 @@ void MFMASmallGemmSingleWaveOpt::applyIGLPStrategy( unsigned MFMACount = 0; unsigned DSRCount = 0; - assert((IsPostRA || - DSWCount == DSWWithPermCount == DSWWithSharedVMEMCount == 0) && + assert((IsPostRA || (DSWCount == 0 && DSWWithPermCount == 0 && + DSWWithSharedVMEMCount == 0)) && "DSWCounters should be zero in pre-RA scheduling!"); SmallVector DSWithPerms; for (auto &SU : DAG->SUnits) { From 2d0ac85b6693eb6e25b4120c0e4e224c42a84462 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 25 Oct 2023 22:15:10 -0700 Subject: [PATCH 081/877] [X86] Fix gcc warning about mix of enumeral and non-enumeral types. NFC --- llvm/lib/Target/X86/X86ISelLowering.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 93db31e03e116..6411f27da0776 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3297,21 +3297,22 @@ unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand( // If the current setup has imm64 mask, then inverse will have // at least imm32 mask (or be zext i32 -> i64). if (VT == MVT::i64) - return AndMask->getSignificantBits() > 32 ? ISD::SRL : ShiftOpc; + return AndMask->getSignificantBits() > 32 ? (unsigned)ISD::SRL + : ShiftOpc; // We can only benefit if req at least 7-bit for the mask. We // don't want to replace shl of 1,2,3 as they can be implemented // with lea/add. - return ShiftOrRotateAmt.uge(7) ? ISD::SRL : ShiftOpc; + return ShiftOrRotateAmt.uge(7) ? (unsigned)ISD::SRL : ShiftOpc; } if (VT == MVT::i64) // Keep exactly 32-bit imm64, this is zext i32 -> i64 which is // extremely efficient. - return AndMask->getSignificantBits() > 33 ? ISD::SHL : ShiftOpc; + return AndMask->getSignificantBits() > 33 ? (unsigned)ISD::SHL : ShiftOpc; // Keep small shifts as shl so we can generate add/lea. - return ShiftOrRotateAmt.ult(7) ? ISD::SHL : ShiftOpc; + return ShiftOrRotateAmt.ult(7) ? (unsigned)ISD::SHL : ShiftOpc; } // We prefer rotate for vectors of if we won't get a zext mask with SRL From 78941e1eedb121344e0d969458ea85598cd749df Mon Sep 17 00:00:00 2001 From: hassnaaHamdi Date: Thu, 26 Oct 2023 16:09:59 +0100 Subject: [PATCH 082/877] [llvm][AArch64][Assembly]: Add FP8 instructions assembly and disassembly. (#69632) This patch adds the feature flag FP8 and the assembly/disassembly for the following instructions of NEON, SVE2 and SME2: * NEON Instructions: + Advanced SIMD two-register miscellaneous: - F1CVTL, F1CVTL2, F2CVTL, F2CVTL2 - BF1CVTL, BF1CVTL2, BF2CVTL, BF2CVTL2 + Advanced SIMD three-register extension: - FCVTN, FCVTN2 (FP32 to FP8) - FCVTN (FP16 to FP8) + Advanced SIMD three same: - FSCALE * SVE2 Instructions: + Downconvert instructions: - FCVTN_Z2Z_HtoB - FCVTNB_Z2Z_StoB - BFCVTN_Z2Z_HtoB - FCVTNT_Z2Z_StoB + Upconvert instructions: - F1CVT_ZZ, F2CVT_ZZ - BF1CVT_ZZ, BF2CVT_ZZ - F1CVTLT_ZZ, F2CVTLT_ZZ - BF1CVTLT_ZZ, BF2CVTLT_ZZ * SME2 Instructions: - F1CVT_2ZZ, F2CVT_2ZZ - BF1CVT_2ZZ, BF2CVT_2ZZ - F1CVTL_2ZZ, F2CVTL_2ZZ - BF1CVTL_2ZZ, BF2CVTL_2ZZ - FCVT_Z2Z_HtoB, BFCVT_Z2Z_HtoB - FCVT_Z4Z - FCVTN_Z4Z - FSCALE_2ZZ, FSCALE_4ZZ - FSCALE_2Z2Z, FSCALE_4Z4Z That is according to this documentation: https://developer.arm.com/documentation/ddi0602/2023-09 --- .../llvm/TargetParser/AArch64TargetParser.h | 2 + llvm/lib/Target/AArch64/AArch64.td | 3 + .../lib/Target/AArch64/AArch64InstrFormats.td | 53 +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 15 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 39 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 21 ++ .../AArch64/AsmParser/AArch64AsmParser.cpp | 1 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 41 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 43 +++ .../MC/AArch64/FP8/directive-arch-negative.s | 7 + llvm/test/MC/AArch64/FP8/directive-arch.s | 7 + .../FP8/miscellaneous-fp8-diagnostics.s | 84 +++++ llvm/test/MC/AArch64/FP8/miscellaneous-fp8.s | 355 ++++++++++++++++++ .../MC/AArch64/FP8_SME2/cvt-diagnostics.s | 87 +++++ llvm/test/MC/AArch64/FP8_SME2/cvt.s | 157 ++++++++ .../MC/AArch64/FP8_SME2/fscale-diagnostics.c | 62 +++ llvm/test/MC/AArch64/FP8_SME2/fscale.s | 160 ++++++++ .../MC/AArch64/FP8_SVE2/fcvt-diagnostics.s | 131 +++++++ llvm/test/MC/AArch64/FP8_SVE2/fcvt.s | 237 ++++++++++++ .../MC/AArch64/FP8_SVE2/fcvtn-diagnostics.s | 70 ++++ llvm/test/MC/AArch64/FP8_SVE2/fcvtn.s | 125 ++++++ .../test/MC/AArch64/SVE2/fcvtnt-diagnostics.s | 4 +- .../TargetParser/TargetParserTest.cpp | 4 +- 23 files changed, 1686 insertions(+), 22 deletions(-) create mode 100644 llvm/test/MC/AArch64/FP8/directive-arch-negative.s create mode 100644 llvm/test/MC/AArch64/FP8/directive-arch.s create mode 100644 llvm/test/MC/AArch64/FP8/miscellaneous-fp8-diagnostics.s create mode 100644 llvm/test/MC/AArch64/FP8/miscellaneous-fp8.s create mode 100644 llvm/test/MC/AArch64/FP8_SME2/cvt-diagnostics.s create mode 100644 llvm/test/MC/AArch64/FP8_SME2/cvt.s create mode 100644 llvm/test/MC/AArch64/FP8_SME2/fscale-diagnostics.c create mode 100644 llvm/test/MC/AArch64/FP8_SME2/fscale.s create mode 100644 llvm/test/MC/AArch64/FP8_SVE2/fcvt-diagnostics.s create mode 100644 llvm/test/MC/AArch64/FP8_SVE2/fcvt.s create mode 100644 llvm/test/MC/AArch64/FP8_SVE2/fcvtn-diagnostics.s create mode 100644 llvm/test/MC/AArch64/FP8_SVE2/fcvtn.s diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index bf14473f133fa..8ff2947794251 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -160,6 +160,7 @@ enum ArchExtKind : unsigned { AEK_ITE = 56, // FEAT_ITE AEK_GCS = 57, // FEAT_GCS AEK_FPMR = 58, // FEAT_FPMR + AEK_FP8 = 59, // FEAT_FP8 AEK_NUM_EXTENSIONS }; using ExtensionBitset = Bitset; @@ -269,6 +270,7 @@ inline constexpr ExtensionInfo Extensions[] = { {"wfxt", AArch64::AEK_NONE, {}, {}, FEAT_WFXT, "+wfxt", 550}, {"gcs", AArch64::AEK_GCS, "+gcs", "-gcs", FEAT_INIT, "", 0}, {"fpmr", AArch64::AEK_FPMR, "+fpmr", "-fpmr", FEAT_INIT, "", 0}, + {"fp8", AArch64::AEK_FP8, "+fp8", "-fp8", FEAT_INIT, "+fpmr", 0}, // Special cases {"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority}, }; diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index ced1d43892036..0c0fa82ffe93c 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -130,6 +130,9 @@ def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", def FeatureFPMR : SubtargetFeature<"fpmr", "HasFPMR", "true", "Enable FPMR Register (FEAT_FPMR)">; +def FeatureFP8 : SubtargetFeature<"fp8", "HasFP8", "true", + "Enable FP8 instructions (FEAT_FP8)">; + // This flag is currently still labeled as Experimental, but when fully // implemented this should tell the compiler to use the zeroing pseudos to // benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index e5dbfa404b3c6..a48bf77a774b7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -6056,6 +6056,49 @@ multiclass SIMDThreeSameVectorFML size, string asm, } +// FP8 assembly/disassembly classes + +//---------------------------------------------------------------------------- +// FP8 Advanced SIMD three-register extension +//---------------------------------------------------------------------------- +class BaseSIMDThreeVectors size, bits<4> op, + RegisterOperand regtype1, + RegisterOperand regtype2, string asm, + string kind1, string kind2> + : I<(outs regtype1:$Rd), (ins regtype2:$Rn, regtype2:$Rm), asm, + "\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2, "", []>, Sched<[]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0b0; + let Inst{20-16} = Rm; + let Inst{15} = 0b1; + let Inst{14-11} = op; + let Inst{10} = 0b1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + + +// FCVTN (FP16 to FP8) +multiclass SIMDThreeSameSizeVectorCvt { + def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b01, 0b1110, V64, V64, asm, ".8b",".4h">; + def v16f8 : BaseSIMDThreeVectors<0b1, 0b0, 0b01, 0b1110, V128, V128, asm, ".16b", ".8h">; +} + +// TODO : Create v16f8 value type +// FCVTN, FCVTN2 (FP32 to FP8) +multiclass SIMDThreeVectorCvt { + def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b00, 0b1110, V64, V128, asm, ".8b", ".4s">; + def 2v16f8 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b00, 0b1110, asm#2, ".16b", ".4s", + V128, v16i8, v4f32, null_frag>; +} + //---------------------------------------------------------------------------- // AdvSIMD two register vector instructions. //---------------------------------------------------------------------------- @@ -6479,6 +6522,16 @@ multiclass SIMDMixedTwoVector opc, string asm, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; } +//---------------------------------------------------------------------------- +// FP8 Advanced SIMD two-register miscellaneous +//---------------------------------------------------------------------------- +multiclass SIMDMixedTwoVectorFP8sz, string asm> { + def v8f16 : BaseSIMDMixedTwoVector<0b0, 0b1, sz, 0b10111, V64, V128, + asm, ".8h", ".8b", []>; + def 2v8f16 : BaseSIMDMixedTwoVector<0b1, 0b1, sz, 0b10111, V128, V128, + asm#2, ".8h", ".16b", []>; +} + class BaseSIMDCmpTwoVector size, bits<2> size2, bits<5> opcode, RegisterOperand regtype, string asm, string kind, string zero, ValueType dty, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 382d3956f105f..6f616b2798435 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -162,6 +162,8 @@ def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">, AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">; def HasFPMR : Predicate<"Subtarget->hasFPMR()">, AssemblerPredicateWithAll<(all_of FeatureFPMR), "fpmr">; +def HasFP8 : Predicate<"Subtarget->hasFP8()">, + AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">; // A subset of SVE(2) instructions are legal in Streaming SVE execution mode, // they should be enabled if either has been specified. @@ -173,6 +175,10 @@ def HasSVE2orSME : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">, AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME), "sve2 or sme">; +def HasSVE2orSME2 + : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME2()">, + AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2), + "sve2 or sme2">; def HasSVE2p1_or_HasSME : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">, AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; @@ -9249,6 +9255,15 @@ let Predicates = [HasD128] in { } } +let Predicates = [HasFP8] in { + defm F1CVTL : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">; + defm F2CVTL : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">; + defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">; + defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">; + defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">; + defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">; + defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>; +} // End let Predicates = [HasFP8] include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 2685f2e3c8108..cdc79ec3ae344 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -330,14 +330,14 @@ defm UMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11, int_aar defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x2>; defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x4>; -defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b0000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>; -defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b0001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>; -defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b1000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>; -defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>; - -defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b0110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>; -defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b0111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>; -defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>; +defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b00000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>; +defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b00001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>; +defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b10000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>; +defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b10001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>; + +defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b00110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>; +defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b00111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>; +defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b10110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>; defm SQCVT_Z4Z : sme2_int_cvt_vg4_single<"sqcvt", 0b000, int_aarch64_sve_sqcvt_x4>; defm UQCVT_Z4Z : sme2_int_cvt_vg4_single<"uqcvt", 0b001, int_aarch64_sve_uqcvt_x4>; defm SQCVTU_Z4Z : sme2_int_cvt_vg4_single<"sqcvtu", 0b100, int_aarch64_sve_sqcvtu_x4>; @@ -855,3 +855,26 @@ defm BFCLAMP_VG4_4ZZZ: sme2p1_bfclamp_vector_vg4_multi<"bfclamp">; defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, 0b11, ZPR16>; defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, 0b11, ZPR16>; } + +let Predicates = [HasSME2, HasFP8] in { +defm F1CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f1cvt", 0b00, 0b0>; +defm F1CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f1cvtl", 0b00, 0b1>; +defm BF1CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvt", 0b01, 0b0>; +defm BF1CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvtl", 0b01, 0b1>; +defm F2CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f2cvt", 0b10, 0b0>; +defm F2CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f2cvtl", 0b10, 0b1>; +defm BF2CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvt", 0b11, 0b0>; +defm BF2CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvtl", 0b11, 0b1>; + +defm FCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"fcvt", 0b0>; +defm BFCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"bfcvt", 0b1>; +defm FCVT_Z4Z_StoB : sme2_fp8_cvt_vg4_single<"fcvt", 0b0>; +defm FCVTN_Z4Z_StoB : sme2_fp8_cvt_vg4_single<"fcvtn", 0b1>; + +defm FSCALE_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fscale", 0b0011000>; +defm FSCALE_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fscale", 0b0011000>; +defm FSCALE_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fscale", 0b0011000>; +defm FSCALE_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fscale", 0b0011000>; + +} // [HasSME2, HasFP8] + diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index d599ac4689e5c..002d5d28fcf8d 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4002,3 +4002,24 @@ defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1">; defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2">; defm TBLQ_ZZZ : sve2p1_tblq<"tblq">; } // End HasSVE2p1_or_HasSME2p1 + +//===----------------------------------------------------------------------===// +// SVE2 FP8 instructions +//===----------------------------------------------------------------------===// +let Predicates = [HasSVE2orSME2, HasFP8] in { +// FP8 upconvert +defm F1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b00, "f1cvt">; +defm F2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b01, "f2cvt">; +defm BF1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b10, "bf1cvt">; +defm BF2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b11, "bf2cvt">; +defm F1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b00, "f1cvtlt">; +defm F2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b01, "f2cvtlt">; +defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt">; +defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt">; + +// FP8 downconvert +defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r>; +defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r>; +defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r>; +defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>; +} // End HasSVE2orSME2, HasFP8 \ No newline at end of file diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 6e70deec3f890..36e34fdc07e7c 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3639,6 +3639,7 @@ static const struct Extension { {"ssbs", {AArch64::FeatureSSBS}}, {"tme", {AArch64::FeatureTME}}, {"fpmr", {AArch64::FeatureFPMR}}, + {"fp8", {AArch64::FeatureFP8}}, }; static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 823115c7d0250..d8b44c68fbdee 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -2161,15 +2161,16 @@ multiclass sme2_frint_vector_vg4_multi op> { mnemonic>; } -class sme2_cvt_vg2_single op> - : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn), +class sme2_cvt_vg2_single op, + RegisterOperand first_ty, RegisterOperand second_ty> + : I<(outs first_ty:$Zd), (ins second_ty:$Zn), mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<4> Zn; bits<5> Zd; let Inst{31-23} = 0b110000010; - let Inst{22} = op{3}; - let Inst{21-18} = 0b1000; - let Inst{17-16} = op{2-1}; + let Inst{22} = op{4}; + let Inst{21-19} = 0b100; + let Inst{18-16} = op{3-1}; let Inst{15-10} = 0b111000; let Inst{9-6} = Zn; let Inst{5} = op{0}; @@ -2178,12 +2179,17 @@ class sme2_cvt_vg2_single op> // SME2 multi-vec FP down convert two registers // SME2 multi-vec int down convert two registers -multiclass sme2_cvt_vg2_single op, ValueType out_vt, +multiclass sme2_cvt_vg2_single op, ValueType out_vt, ValueType in_vt, SDPatternOperator intrinsic> { - def NAME : sme2_cvt_vg2_single; + def NAME : sme2_cvt_vg2_single; def : SVE2p1_Cvt_VG2_Pat; } +// SME2 multi-vec FP8 down convert two registers +multiclass sme2_fp8_cvt_vg2_single { + def NAME : sme2_cvt_vg2_single; +} + class sme2_cvt_unpk_vector_vg2sz, bits<3> op, bit u, RegisterOperand first_ty, RegisterOperand second_ty, string mnemonic> : I<(outs first_ty:$Zd), (ins second_ty:$Zn), @@ -2212,7 +2218,13 @@ multiclass sme2p1_fp_cvt_vector_vg2_single { def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>; } -class sme2_cvt_vg4_single op, RegisterOperand first_ty, +// SME2 multi-vec FP8 up convert two registers +multiclass sme2p1_fp8_cvt_vector_vg2_single opc, bit L> { + def _NAME : sme2_cvt_unpk_vector_vg2; +} + + +class sme2_cvt_vg4_single op, bits<4>op2, RegisterOperand first_ty, RegisterOperand second_ty, string mnemonic> : I<(outs first_ty:$Zd), (ins second_ty:$Zn), mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { @@ -2221,7 +2233,9 @@ class sme2_cvt_vg4_single op, RegisterOperand first_ty, let Inst{31-24} = 0b11000001; let Inst{23} = sz; let Inst{22} = op{2}; - let Inst{21-10} = 0b110011111000; + let Inst{21-20} = 0b11; + let Inst{19-16} = op2; + let Inst{15-10} = 0b111000; let Inst{9-7} = Zn; let Inst{6-5} = op{1-0}; let Inst{4-0} = Zd; @@ -2229,13 +2243,18 @@ class sme2_cvt_vg4_single op, RegisterOperand first_ty, // SME2 multi-vec int down convert four registers multiclass sme2_int_cvt_vg4_single op, SDPatternOperator intrinsic> { - def _StoB : sme2_cvt_vg4_single<0, op, ZPR8, ZZZZ_s_mul_r, mnemonic>; - def _DtoH : sme2_cvt_vg4_single<1, op, ZPR16, ZZZZ_d_mul_r, mnemonic>; + def _StoB : sme2_cvt_vg4_single<0, op, 0b0011, ZPR8, ZZZZ_s_mul_r, mnemonic>; + def _DtoH : sme2_cvt_vg4_single<1, op, 0b0011, ZPR16, ZZZZ_d_mul_r, mnemonic>; def : SME2_Cvt_VG4_Pat; def : SME2_Cvt_VG4_Pat; } +//SME2 multi-vec FP8 down convert four registers +multiclass sme2_fp8_cvt_vg4_single { + def _NAME : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic>; +} + class sme2_unpk_vector_vg4sz, bit u, RegisterOperand first_ty, RegisterOperand second_ty, string mnemonic> : I<(outs first_ty:$Zd), (ins second_ty:$Zn), diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 7bb457d918821..d2f72fda3a229 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -10078,3 +10078,46 @@ multiclass sve2p1_tblq { def _S : sve2p1_permute_vec_elems_q<0b10, 0b110, mnemonic, ZPR32, Z_s>; def _D : sve2p1_permute_vec_elems_q<0b11, 0b110, mnemonic, ZPR64, Z_d>; } + +//===----------------------------------------------------------------------===// +// SVE2 FP8 Instructions +//===----------------------------------------------------------------------===// + +// FP8 upconvert +class sve2_fp8_cvt_single opc, string mnemonic, + ZPRRegOp dst_ty, ZPRRegOp src_ty> + : I<(outs dst_ty:$Zd), (ins src_ty:$Zn), + mnemonic, "\t$Zd, $Zn", + "", []>, Sched<[]>{ + bits<5> Zd; + bits<5> Zn; + let Inst{31-17} = 0b011001010000100; + let Inst{16} = L; + let Inst{15-12} = 0b0011; + let Inst{11-10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve2_fp8_cvt_single opc, string mnemonic> { + def _BtoH : sve2_fp8_cvt_single; +} + +// FP8 downconvert +class sve2_fp8_down_cvt_single opc, string mnemonic, + ZPRRegOp dst_ty, RegisterOperand src_ty> + : I<(outs dst_ty:$Zd), (ins src_ty:$Zn), + mnemonic, "\t$Zd, $Zn", + "", []>, Sched<[]>{ + bits<5> Zd; + bits<4> Zn; + let Inst{31-12} = 0b01100101000010100011; + let Inst{11-10} = opc; + let Inst{9-6} = Zn; + let Inst{5} = 0b0; + let Inst{4-0} = Zd; +} + +multiclass sve2_fp8_down_cvt_single opc, string mnemonic, RegisterOperand src> { + def NAME : sve2_fp8_down_cvt_single; +} \ No newline at end of file diff --git a/llvm/test/MC/AArch64/FP8/directive-arch-negative.s b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s new file mode 100644 index 0000000000000..cf48416d29d8a --- /dev/null +++ b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s @@ -0,0 +1,7 @@ +// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s + +.arch armv9-a+fp8 +.arch armv9-a+nofp8 +bf1cvtl v0.8h, v0.8b +// CHECK: error: instruction requires: fp8 +// CHECK: bf1cvtl v0.8h, v0.8b diff --git a/llvm/test/MC/AArch64/FP8/directive-arch.s b/llvm/test/MC/AArch64/FP8/directive-arch.s new file mode 100644 index 0000000000000..8857d4f0bfbe4 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8/directive-arch.s @@ -0,0 +1,7 @@ +// RUN: llvm-mc -triple aarch64 -o - %s 2>&1 | FileCheck %s + +.arch armv9-a+fp8 +bf1cvtl v0.8h, v0.8b +// CHECK: bf1cvtl v0.8h, v0.8b + +.arch armv9-a+nofp8 diff --git a/llvm/test/MC/AArch64/FP8/miscellaneous-fp8-diagnostics.s b/llvm/test/MC/AArch64/FP8/miscellaneous-fp8-diagnostics.s new file mode 100644 index 0000000000000..4f79f038b6948 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8/miscellaneous-fp8-diagnostics.s @@ -0,0 +1,84 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+fp8 2>&1 < %s| FileCheck %s + +// --------------------------------------------------------------------------// +// Element size extension incorrect + +bf1cvtl v0.8h, v0.8h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bf1cvtl v0.8h, v0.8h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf1cvtl2 v0.8h, v0.16h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier +// CHECK-NEXT: bf1cvtl2 v0.8h, v0.16h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf2cvtl v0.8h, v0.8h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bf2cvtl v0.8h, v0.8h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf2cvtl2 v0.8h, v0.16h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier +// CHECK-NEXT: bf2cvtl2 v0.8h, v0.16h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f1cvtl v0.8h, v0.8h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: f1cvtl v0.8h, v0.8h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f1cvtl2 v0.8h, v0.16h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier +// CHECK-NEXT: f1cvtl2 v0.8h, v0.16h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f2cvtl v0.8h, v0.8h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: f2cvtl v0.8h, v0.8h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f2cvtl2 v0.8h, v0.16h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier +// CHECK-NEXT: f2cvtl2 v0.8h, v0.16h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtn v31.8h, v31.4h, v31.4h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fcvtn v31.8h, v31.4h, v31.4h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtn v0.8s, v0.4s, v0.4s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier +// CHECK-NEXT: fcvtn v0.8s, v0.4s, v0.4s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtn2 v0.16s, v0.4s, v0.4s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier +// CHECK-NEXT: fcvtn2 v0.16s, v0.4s, v0.4s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale v0.4h, v0.4s, v0.4s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fscale v0.4h, v0.4s, v0.4s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale v0.8h, v0.8s, v0.8s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier +// CHECK-NEXT: fscale v0.8h, v0.8s, v0.8s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale v0.2s, v0.2h, v0.2h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fscale v0.2s, v0.2h, v0.2h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale v0.4s, v31.4h, v0.4h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fscale v0.4s, v31.4h, v0.4h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale v0.2d, v31.2h, v0.2h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fscale v0.2d, v31.2h, v0.2h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/FP8/miscellaneous-fp8.s b/llvm/test/MC/AArch64/FP8/miscellaneous-fp8.s new file mode 100644 index 0000000000000..2f1fd9b86ed84 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8/miscellaneous-fp8.s @@ -0,0 +1,355 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fp8 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fp8 < %s \ +// RUN: | llvm-objdump -d --mattr=+fp8 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fp8 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fp8 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+fp8 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +/// +/// BF1CVTL instructions. +/// +bf1cvtl v0.8h, v0.8b +// CHECK-INST: bf1cvtl v0.8h, v0.8b +// CHECK-ENCODING: [0x00,0x78,0xa1,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2ea17800 + +bf1cvtl v0.8h, v31.8b +// CHECK-INST: bf1cvtl v0.8h, v31.8b +// CHECK-ENCODING: [0xe0,0x7b,0xa1,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2ea17be0 + +bf1cvtl v31.8h, v31.8b +// CHECK-INST: bf1cvtl v31.8h, v31.8b +// CHECK-ENCODING: [0xff,0x7b,0xa1,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2ea17bff + +/// +/// BF1CVTL2 instructions. +/// +bf1cvtl2 v0.8h, v0.16b +// CHECK-INST: bf1cvtl2 v0.8h, v0.16b +// CHECK-ENCODING: [0x00,0x78,0xa1,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6ea17800 + +bf1cvtl2 v0.8h, v31.16b +// CHECK-INST: bf1cvtl2 v0.8h, v31.16b +// CHECK-ENCODING: [0xe0,0x7b,0xa1,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6ea17be0 + +bf1cvtl2 v31.8h, v31.16b +// CHECK-INST: bf1cvtl2 v31.8h, v31.16b +// CHECK-ENCODING: [0xff,0x7b,0xa1,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6ea17bff + +/// +/// BF2CVTL instructions. +/// +bf2cvtl v0.8h, v0.8b +// CHECK-INST: bf2cvtl v0.8h, v0.8b +// CHECK-ENCODING: [0x00,0x78,0xe1,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2ee17800 + +bf2cvtl v0.8h, v31.8b +// CHECK-INST: bf2cvtl v0.8h, v31.8b +// CHECK-ENCODING: [0xe0,0x7b,0xe1,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2ee17be0 + +bf2cvtl v31.8h, v31.8b +// CHECK-INST: bf2cvtl v31.8h, v31.8b +// CHECK-ENCODING: [0xff,0x7b,0xe1,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2ee17bff + +/// +/// BF2CVTL2 instructions. +/// +bf2cvtl2 v0.8h, v0.16b +// CHECK-INST: bf2cvtl2 v0.8h, v0.16b +// CHECK-ENCODING: [0x00,0x78,0xe1,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6ee17800 + +bf2cvtl2 v0.8h, v31.16b +// CHECK-INST: bf2cvtl2 v0.8h, v31.16b +// CHECK-ENCODING: [0xe0,0x7b,0xe1,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6ee17be0 + +bf2cvtl2 v31.8h, v31.16b +// CHECK-INST: bf2cvtl2 v31.8h, v31.16b +// CHECK-ENCODING: [0xff,0x7b,0xe1,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6ee17bff + +/// +/// F1CVTL instructions. +/// +f1cvtl v0.8h, v0.8b +// CHECK-INST: f1cvtl v0.8h, v0.8b +// CHECK-ENCODING: [0x00,0x78,0x21,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2e217800 + +f1cvtl v0.8h, v31.8b +// CHECK-INST: f1cvtl v0.8h, v31.8b +// CHECK-ENCODING: [0xe0,0x7b,0x21,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2e217be0 + +f1cvtl v31.8h, v31.8b +// CHECK-INST: f1cvtl v31.8h, v31.8b +// CHECK-ENCODING: [0xff,0x7b,0x21,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2e217bff + +/// +/// F1CVTL2 instructions. +/// +f1cvtl2 v0.8h, v0.16b +// CHECK-INST: f1cvtl2 v0.8h, v0.16b +// CHECK-ENCODING: [0x00,0x78,0x21,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6e217800 + +f1cvtl2 v0.8h, v31.16b +// CHECK-INST: f1cvtl2 v0.8h, v31.16b +// CHECK-ENCODING: [0xe0,0x7b,0x21,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6e217be0 + +f1cvtl2 v31.8h, v31.16b +// CHECK-INST: f1cvtl2 v31.8h, v31.16b +// CHECK-ENCODING: [0xff,0x7b,0x21,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6e217bff + +/// +/// F2CVTL instructions. +/// +f2cvtl v0.8h, v0.8b +// CHECK-INST: f2cvtl v0.8h, v0.8b +// CHECK-ENCODING: [0x00,0x78,0x61,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2e617800 + +f2cvtl v0.8h, v31.8b +// CHECK-INST: f2cvtl v0.8h, v31.8b +// CHECK-ENCODING: [0xe0,0x7b,0x61,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2e617be0 + +f2cvtl v31.8h, v31.8b +// CHECK-INST: f2cvtl v31.8h, v31.8b +// CHECK-ENCODING: [0xff,0x7b,0x61,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2e617bff + +/// +/// F2CVTL2 instructions. +/// +f2cvtl2 v0.8h, v0.16b +// CHECK-INST: f2cvtl2 v0.8h, v0.16b +// CHECK-ENCODING: [0x00,0x78,0x61,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6e617800 + +f2cvtl2 v0.8h, v31.16b +// CHECK-INST: f2cvtl2 v0.8h, v31.16b +// CHECK-ENCODING: [0xe0,0x7b,0x61,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6e617be0 + +f2cvtl2 v31.8h, v31.16b +// CHECK-INST: f2cvtl2 v31.8h, v31.16b +// CHECK-ENCODING: [0xff,0x7b,0x61,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6e617bff + +/// +/// FCVTN instructions. +/// +// FP16 TO FP8 +fcvtn v31.8b, v31.4h, v31.4h +// CHECK-INST: fcvtn v31.8b, v31.4h, v31.4h +// CHECK-ENCODING: [0xff,0xf7,0x5f,0x0e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 0e5ff7ff + +fcvtn v31.8b, v0.4h, v0.4h +// CHECK-INST: fcvtn v31.8b, v0.4h, v0.4h +// CHECK-ENCODING: [0x1f,0xf4,0x40,0x0e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 0e40f41f + +fcvtn v0.8b, v0.4h, v0.4h +// CHECK-INST: fcvtn v0.8b, v0.4h, v0.4h +// CHECK-ENCODING: [0x00,0xf4,0x40,0x0e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 0e40f400 + +fcvtn v0.16b, v0.8h, v0.8h +// CHECK-INST: fcvtn v0.16b, v0.8h, v0.8h +// CHECK-ENCODING: [0x00,0xf4,0x40,0x4e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 4e40f400 + +fcvtn v31.16b, v0.8h, v0.8h +// CHECK-INST: fcvtn v31.16b, v0.8h, v0.8h +// CHECK-ENCODING: [0x1f,0xf4,0x40,0x4e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 4e40f41f + +fcvtn v31.16b, v31.8h, v31.8h +// CHECK-INST: fcvtn v31.16b, v31.8h, v31.8h +// CHECK-ENCODING: [0xff,0xf7,0x5f,0x4e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 4e5ff7ff + +// FP32 TO FP8 +fcvtn v0.8b, v0.4s, v0.4s +// CHECK-INST: fcvtn v0.8b, v0.4s, v0.4s +// CHECK-ENCODING: [0x00,0xf4,0x00,0x0e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 0e00f400 + +fcvtn v0.8b, v31.4s, v31.4s +// CHECK-INST: fcvtn v0.8b, v31.4s, v31.4s +// CHECK-ENCODING: [0xe0,0xf7,0x1f,0x0e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 0e1ff7e0 + +fcvtn v31.8b, v31.4s, v31.4s +// CHECK-INST: fcvtn v31.8b, v31.4s, v31.4s +// CHECK-ENCODING: [0xff,0xf7,0x1f,0x0e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 0e1ff7ff + +/// +/// FCVTN2 instructions. +/// + +fcvtn2 v0.16b, v0.4s, v0.4s +// CHECK-INST: fcvtn2 v0.16b, v0.4s, v0.4s +// CHECK-ENCODING: [0x00,0xf4,0x00,0x4e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 4e00f400 + +fcvtn2 v0.16b, v0.4s, v31.4s +// CHECK-INST: fcvtn2 v0.16b, v0.4s, v31.4s +// CHECK-ENCODING: [0x00,0xf4,0x1f,0x4e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 4e1ff400 + +fcvtn2 v31.16b, v31.4s, v31.4s +// CHECK-INST: fcvtn2 v31.16b, v31.4s, v31.4s +// CHECK-ENCODING: [0xff,0xf7,0x1f,0x4e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 4e1ff7ff + +/// +/// FSCALE instructions. +/// +fscale v0.4h, v0.4h, v0.4h +// CHECK-INST: fscale v0.4h, v0.4h, v0.4h +// CHECK-ENCODING: [0x00,0x3c,0xc0,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2ec03c00 + +fscale v0.4h, v31.4h, v31.4h +// CHECK-INST: fscale v0.4h, v31.4h, v31.4h +// CHECK-ENCODING: [0xe0,0x3f,0xdf,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2edf3fe0 + +fscale v31.4h, v31.4h, v31.4h +// CHECK-INST: fscale v31.4h, v31.4h, v31.4h +// CHECK-ENCODING: [0xff,0x3f,0xdf,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2edf3fff + +fscale v0.8h, v0.8h, v0.8h +// CHECK-INST: fscale v0.8h, v0.8h, v0.8h +// CHECK-ENCODING: [0x00,0x3c,0xc0,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6ec03c00 + +fscale v31.8h, v0.8h, v0.8h +// CHECK-INST: fscale v31.8h, v0.8h, v0.8h +// CHECK-ENCODING: [0x1f,0x3c,0xc0,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6ec03c1f + +fscale v31.8h, v31.8h, v31.8h +// CHECK-INST: fscale v31.8h, v31.8h, v31.8h +// CHECK-ENCODING: [0xff,0x3f,0xdf,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6edf3fff + +fscale v0.2s, v0.2s, v0.2s +// CHECK-INST: fscale v0.2s, v0.2s, v0.2s +// CHECK-ENCODING: [0x00,0xfc,0xa0,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2ea0fc00 + +fscale v0.2s, v0.2s, v31.2s +// CHECK-INST: fscale v0.2s, v0.2s, v31.2s +// CHECK-ENCODING: [0x00,0xfc,0xbf,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2ebffc00 + +fscale v31.2s, v31.2s, v31.2s +// CHECK-INST: fscale v31.2s, v31.2s, v31.2s +// CHECK-ENCODING: [0xff,0xff,0xbf,0x2e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 2ebfffff + +fscale v0.4s, v0.4s, v0.4s +// CHECK-INST: fscale v0.4s, v0.4s, v0.4s +// CHECK-ENCODING: [0x00,0xfc,0xa0,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6ea0fc00 + +fscale v0.4s, v31.4s, v0.4s +// CHECK-INST: fscale v0.4s, v31.4s, v0.4s +// CHECK-ENCODING: [0xe0,0xff,0xa0,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6ea0ffe0 + +fscale v31.4s, v31.4s, v31.4s +// CHECK-INST: fscale v31.4s, v31.4s, v31.4s +// CHECK-ENCODING: [0xff,0xff,0xbf,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6ebfffff + +fscale v0.2d, v0.2d, v0.2d +// CHECK-INST: fscale v0.2d, v0.2d, v0.2d +// CHECK-ENCODING: [0x00,0xfc,0xe0,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6ee0fc00 + +fscale v0.2d, v31.2d, v0.2d +// CHECK-INST: fscale v0.2d, v31.2d, v0.2d +// CHECK-ENCODING: [0xe0,0xff,0xe0,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6ee0ffe0 + +fscale v31.2d, v31.2d, v31.2d +// CHECK-INST: fscale v31.2d, v31.2d, v31.2d +// CHECK-ENCODING: [0xff,0xff,0xff,0x6e] +// CHECK-ERROR: instruction requires: fp8 +// CHECK-UNKNOWN: 6effffff diff --git a/llvm/test/MC/AArch64/FP8_SME2/cvt-diagnostics.s b/llvm/test/MC/AArch64/FP8_SME2/cvt-diagnostics.s new file mode 100644 index 0000000000000..418ae9e2a4405 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8_SME2/cvt-diagnostics.s @@ -0,0 +1,87 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+fp8 2>&1 < %s| FileCheck %s + +// --------------------------------------------------------------------------// +// Incorrect operand + +f1cvt { z0.h, z1.h }, z0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: unknown token in expression +// CHECK-NEXT: f1cvt { z0.h, z1.h }, z0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf1cvt { z0, z1 }, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bf1cvt { z0, z1 }, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf1cvtl { z0.b, z1.b }, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bf1cvtl { z0.b, z1.b }, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf2cvt { z0.h, z1.h }, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bf2cvt { z0.h, z1.h }, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf2cvtl { z30.h}, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bf2cvtl { z30.h}, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f2cvt { z0, z1.h }, {z0.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix +// CHECK-NEXT: f2cvt { z0, z1.h }, {z0.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f2cvtl z0.h, z1.h, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: f2cvtl z0.h, z1.h, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvt z31.b, { z30.h } +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fcvt z31.b, { z30.h } +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfcvt z0.b, { z0.b, z1.b } +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfcvt z0.b, { z0.b, z1.b } +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Incorrect range of vectors + +bf1cvt { z1.h, z2.h }, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: bf1cvt { z1.h, z2.h }, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f1cvt { z1.h, z0.h }, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: f1cvt { z1.h, z0.h }, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f1cvtl { z31.h, z0.h }, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: f1cvtl { z31.h, z0.h }, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvt z31.b, { z29.s - z0.s } +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: fcvt z31.b, { z29.s - z0.s } +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtn z31.b, { z30.s - z1.s } +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: fcvtn z31.b, { z30.s - z1.s } +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtn z0.b, { z31.s - z2.s } +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: fcvtn z0.b, { z31.s - z2.s } +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtn z0.b, { z1.s - z4.s } +// CHECK: [[@LINE-1]]:{{[0-9]+}}: +// CHECK-NEXT: fcvtn z0.b, { z1.s - z4.s } +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/FP8_SME2/cvt.s b/llvm/test/MC/AArch64/FP8_SME2/cvt.s new file mode 100644 index 0000000000000..35539823fde22 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8_SME2/cvt.s @@ -0,0 +1,157 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+fp8 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+fp8 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+fp8 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+fp8 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+fp8 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+fp8 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +f1cvt {z0.h-z1.h}, z0.b // 11000001-00100110-11100000-00000000 +// CHECK-INST: f1cvt { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x00,0xe0,0x26,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c126e000 + +f1cvt {z30.h-z31.h}, z31.b // 11000001-00100110-11100011-11111110 +// CHECK-INST: f1cvt { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xfe,0xe3,0x26,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c126e3fe + +f1cvtl {z0.h-z1.h}, z0.b // 11000001-00100110-11100000-00000001 +// CHECK-INST: f1cvtl { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x01,0xe0,0x26,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c126e001 + +f1cvtl {z30.h-z31.h}, z31.b // 11000001-00100110-11100011-11111111 +// CHECK-INST: f1cvtl { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xff,0xe3,0x26,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c126e3ff + +bf1cvt {z0.h-z1.h}, z0.b // 11000001-01100110-11100000-00000000 +// CHECK-INST: bf1cvt { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x00,0xe0,0x66,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c166e000 + +bf1cvt {z30.h-z31.h}, z31.b // 11000001-01100110-11100011-11111110 +// CHECK-INST: bf1cvt { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xfe,0xe3,0x66,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c166e3fe + +bf1cvtl {z0.h-z1.h}, z0.b // 11000001-01100110-11100000-00000001 +// CHECK-INST: bf1cvtl { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x01,0xe0,0x66,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c166e001 + +bf1cvtl {z30.h-z31.h}, z31.b // 11000001-01100110-11100011-11111111 +// CHECK-INST: bf1cvtl { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xff,0xe3,0x66,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c166e3ff + +bf2cvt {z0.h-z1.h}, z0.b // 11000001-11100110-11100000-00000000 +// CHECK-INST: bf2cvt { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x00,0xe0,0xe6,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1e6e000 + +bf2cvt {z30.h-z31.h}, z31.b // 11000001-11100110-11100011-11111110 +// CHECK-INST: bf2cvt { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xfe,0xe3,0xe6,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1e6e3fe + +bf2cvtl {z0.h-z1.h}, z0.b // 11000001-11100110-11100000-00000001 +// CHECK-INST: bf2cvtl { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x01,0xe0,0xe6,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1e6e001 + +bf2cvtl {z30.h-z31.h}, z31.b // 11000001-11100110-11100011-11111111 +// CHECK-INST: bf2cvtl { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xff,0xe3,0xe6,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1e6e3ff + +f2cvt {z0.h-z1.h}, z0.b // 11000001-10100110-11100000-00000000 +// CHECK-INST: f2cvt { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x00,0xe0,0xa6,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1a6e000 + +f2cvt {z30.h-z31.h}, z31.b // 11000001-10100110-11100011-11111110 +// CHECK-INST: f2cvt { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xfe,0xe3,0xa6,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1a6e3fe + +f2cvtl {z0.h-z1.h}, z0.b // 11000001-10100110-11100000-00000001 +// CHECK-INST: f2cvtl { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x01,0xe0,0xa6,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1a6e001 + +f2cvtl {z30.h-z31.h}, z31.b // 11000001-10100110-11100011-11111111 +// CHECK-INST: f2cvtl { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xff,0xe3,0xa6,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1a6e3ff + +fcvt z0.b, {z0.h-z1.h} // 11000001-00100100-11100000-00000000 +// CHECK-INST: fcvt z0.b, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0xe0,0x24,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c124e000 + +fcvt z31.b, {z30.h-z31.h} // 11000001-00100100-11100011-11011111 +// CHECK-INST: fcvt z31.b, { z30.h, z31.h } +// CHECK-ENCODING: [0xdf,0xe3,0x24,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c124e3df + +fcvt z0.b, {z0.s-z3.s} // 11000001-00110100-11100000-00000000 +// CHECK-INST: fcvt z0.b, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0x34,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c134e000 + +fcvt z31.b, {z28.s-z31.s} // 11000001-00110100-11100011-10011111 +// CHECK-INST: fcvt z31.b, { z28.s - z31.s } +// CHECK-ENCODING: [0x9f,0xe3,0x34,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c134e39f + +fcvtn z0.b, {z0.s-z3.s} // 11000001-00110100-11100000-00100000 +// CHECK-INST: fcvtn z0.b, { z0.s - z3.s } +// CHECK-ENCODING: [0x20,0xe0,0x34,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c134e020 + +fcvtn z31.b, {z28.s-z31.s} // 11000001-00110100-11100011-10111111 +// CHECK-INST: fcvtn z31.b, { z28.s - z31.s } +// CHECK-ENCODING: [0xbf,0xe3,0x34,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c134e3bf + +bfcvt z0.b, {z0.h-z1.h} // 11000001-01100100-11100000-00000000 +// CHECK-INST: bfcvt z0.b, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0xe0,0x64,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c164e000 + +bfcvt z31.b, {z30.h-z31.h} // 11000001-01100100-11100011-11011111 +// CHECK-INST: bfcvt z31.b, { z30.h, z31.h } +// CHECK-ENCODING: [0xdf,0xe3,0x64,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c164e3df diff --git a/llvm/test/MC/AArch64/FP8_SME2/fscale-diagnostics.c b/llvm/test/MC/AArch64/FP8_SME2/fscale-diagnostics.c new file mode 100644 index 0000000000000..b9288835a7001 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8_SME2/fscale-diagnostics.c @@ -0,0 +1,62 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+fp8 2>&1 < %s| FileCheck %s + +// --------------------------------------------------------------------------// +// Incorrect operand + +fscale {z0.h-z1.h}, {z0.h-z1.h}, z0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand +// CHECK-NEXT: fscale {z0.h-z1.h}, {z0.h-z1.h}, z0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale {z0.d-z1.d}, {z0.h-z1.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fscale {z0.d-z1.d}, {z0.h-z1.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale {z30.s-z31.s}, {z30.s-z31.s}, {z30 - z31} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fscale {z30.s-z31.s}, {z30.s-z31.s}, {z30 - z31} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale {z0.s-z3.s}, {z0.d-z3.d}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fscale {z0.s-z3.s}, {z0.d-z3.d}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale {z28.h-z31.h}, {z28-z31}, {z28.h-z31.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fscale {z28.h-z31.h}, {z28-z31}, {z28.h-z31.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale {z28.d-z31.d}, z28.d, {z28.d-z31.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fscale {z28.d-z31.d}, z28.d, {z28.d-z31.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale {z0.h-z1.h}, {z1.h-z4.h}, {z0.h-z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fscale {z0.h-z1.h}, {z1.h-z4.h}, {z0.h-z1.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Incorrect range of vectors + +fscale {z0.h-z1.h}, {z1.h-z2.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: fscale {z0.h-z1.h}, {z1.h-z2.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale {z0.h-z1.h}, {z31.h-z0.h}, {z0.h-z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: fscale {z0.h-z1.h}, {z31.h-z0.h}, {z0.h-z1.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale {z2.h-z5.h}, {z0.h-z3.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: fscale {z2.h-z5.h}, {z0.h-z3.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fscale {z0.h-z3.h}, {z0.h-z3.h}, {z3.h-z6.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: fscale {z0.h-z3.h}, {z0.h-z3.h}, {z3.h-z6.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/FP8_SME2/fscale.s b/llvm/test/MC/AArch64/FP8_SME2/fscale.s new file mode 100644 index 0000000000000..b07bc9606ade4 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8_SME2/fscale.s @@ -0,0 +1,160 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+fp8 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+fp8 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+fp8 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+fp8 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+fp8 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+fp8 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +//2X +fscale {z0.h-z1.h}, {z0.h-z1.h}, z0.h // 11000001-01100000-10100001-10000000 +// CHECK-INST: fscale { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x80,0xa1,0x60,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c160a180 + +fscale {z30.h-z31.h}, {z30.h-z31.h}, z15.h // 11000001-01101111-10100001-10011110 +// CHECK-INST: fscale { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0x9e,0xa1,0x6f,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c16fa19e + +fscale {z0.s-z1.s}, {z0.s-z1.s}, z0.s // 11000001-10100000-10100001-10000000 +// CHECK-INST: fscale { z0.s, z1.s }, { z0.s, z1.s }, z0.s +// CHECK-ENCODING: [0x80,0xa1,0xa0,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1a0a180 + +fscale {z30.s-z31.s}, {z30.s-z31.s}, z15.s // 11000001-10101111-10100001-10011110 +// CHECK-INST: fscale { z30.s, z31.s }, { z30.s, z31.s }, z15.s +// CHECK-ENCODING: [0x9e,0xa1,0xaf,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1afa19e + +fscale {z0.d-z1.d}, {z0.d-z1.d}, z0.d // 11000001-11100000-10100001-10000000 +// CHECK-INST: fscale { z0.d, z1.d }, { z0.d, z1.d }, z0.d +// CHECK-ENCODING: [0x80,0xa1,0xe0,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1e0a180 + +fscale {z30.d-z31.d}, {z30.d-z31.d}, z15.d // 11000001-11101111-10100001-10011110 +// CHECK-INST: fscale { z30.d, z31.d }, { z30.d, z31.d }, z15.d +// CHECK-ENCODING: [0x9e,0xa1,0xef,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1efa19e + +fscale {z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h} // 11000001-01100000-10110001-10000000 +// CHECK-INST: fscale { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x80,0xb1,0x60,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c160b180 + +fscale {z30.h-z31.h}, {z30.h-z31.h}, {z30.h-z31.h} // 11000001-01111110-10110001-10011110 +// CHECK-INST: fscale { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x9e,0xb1,0x7e,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c17eb19e + +fscale {z0.s-z1.s}, {z0.s-z1.s}, {z0.s-z1.s} // 11000001-10100000-10110001-10000000 +// CHECK-INST: fscale { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x80,0xb1,0xa0,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1a0b180 + +fscale {z30.s-z31.s}, {z30.s-z31.s}, {z30.s-z31.s} // 11000001-10111110-10110001-10011110 +// CHECK-INST: fscale { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0x9e,0xb1,0xbe,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1beb19e + +fscale {z0.d-z1.d}, {z0.d-z1.d}, {z0.d-z1.d} // 11000001-11100000-10110001-10000000 +// CHECK-INST: fscale { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x80,0xb1,0xe0,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1e0b180 + +fscale {z30.d-z31.d}, {z30.d-z31.d}, {z30.d-z31.d} // 11000001-11111110-10110001-10011110 +// CHECK-INST: fscale { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0x9e,0xb1,0xfe,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1feb19e + + +//4X + +fscale {z0.h-z3.h}, {z0.h-z3.h}, z0.h // 11000001-01100000-10101001-10000000 +// CHECK-INST: fscale { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x80,0xa9,0x60,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c160a980 + +fscale {z28.h-z31.h}, {z28.h-z31.h}, z15.h // 11000001-01101111-10101001-10011100 +// CHECK-INST: fscale { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x9c,0xa9,0x6f,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c16fa99c + +fscale {z0.s-z3.s}, {z0.s-z3.s}, z0.s // 11000001-10100000-10101001-10000000 +// CHECK-INST: fscale { z0.s - z3.s }, { z0.s - z3.s }, z0.s +// CHECK-ENCODING: [0x80,0xa9,0xa0,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1a0a980 + +fscale {z28.s-z31.s}, {z28.s-z31.s}, z15.s // 11000001-10101111-10101001-10011100 +// CHECK-INST: fscale { z28.s - z31.s }, { z28.s - z31.s }, z15.s +// CHECK-ENCODING: [0x9c,0xa9,0xaf,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1afa99c + +fscale {z0.d-z3.d}, {z0.d-z3.d}, z0.d // 11000001-11100000-10101001-10000000 +// CHECK-INST: fscale { z0.d - z3.d }, { z0.d - z3.d }, z0.d +// CHECK-ENCODING: [0x80,0xa9,0xe0,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1e0a980 + +fscale {z28.d-z31.d}, {z28.d-z31.d}, z15.d // 11000001-11101111-10101001-10011100 +// CHECK-INST: fscale { z28.d - z31.d }, { z28.d - z31.d }, z15.d +// CHECK-ENCODING: [0x9c,0xa9,0xef,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1efa99c + +fscale {z0.h-z3.h}, {z0.h-z3.h}, {z0.h-z3.h} // 11000001-01100000-10111001-10000000 +// CHECK-INST: fscale { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x80,0xb9,0x60,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c160b980 + +fscale {z28.h-z31.h}, {z28.h-z31.h}, {z28.h-z31.h} // 11000001-01111100-10111001-10011100 +// CHECK-INST: fscale { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9c,0xb9,0x7c,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c17cb99c + +fscale {z0.s-z3.s}, {z0.s-z3.s}, {z0.s-z3.s} // 11000001-10100000-10111001-10000000 +// CHECK-INST: fscale { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x80,0xb9,0xa0,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1a0b980 + +fscale {z28.s-z31.s}, {z28.s-z31.s}, {z28.s-z31.s} // 11000001-10111100-10111001-10011100 +// CHECK-INST: fscale { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xb9,0xbc,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1bcb99c + +fscale {z0.d-z3.d}, {z0.d-z3.d}, {z0.d-z3.d} // 11000001-11100000-10111001-10000000 +// CHECK-INST: fscale { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x80,0xb9,0xe0,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1e0b980 + +fscale {z28.d-z31.d}, {z28.d-z31.d}, {z28.d-z31.d} // 11000001-11111100-10111001-10011100 +// CHECK-INST: fscale { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x9c,0xb9,0xfc,0xc1] +// CHECK-ERROR: instruction requires: fp8 sme2 +// CHECK-UNKNOWN: c1fcb99c diff --git a/llvm/test/MC/AArch64/FP8_SVE2/fcvt-diagnostics.s b/llvm/test/MC/AArch64/FP8_SVE2/fcvt-diagnostics.s new file mode 100644 index 0000000000000..6ecbdaed314b3 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8_SVE2/fcvt-diagnostics.s @@ -0,0 +1,131 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+fp8 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+fp8 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// + +f1cvt z0.h, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: f1cvt z0.h, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f1cvt z0.b, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: f1cvt z0.b, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f1cvt z32.h, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: f1cvt z32.h, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + +f2cvt z0.h, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: f2cvt z0.h, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f2cvt z0.b, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: f2cvt z0.b, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f2cvt z32.h, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: f2cvt z32.h, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + +bf1cvt z0.h, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bf1cvt z0.h, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf1cvt z0.b, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bf1cvt z0.b, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf1cvt z32.h, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bf1cvt z32.h, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + +bf2cvt z0.h, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bf2cvt z0.h, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf2cvt z0.b, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bf2cvt z0.b, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf2cvt z32.h, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bf2cvt z32.h, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + +f1cvtlt z0.h, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: f1cvtlt z0.h, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f1cvtlt z0.b, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: f1cvtlt z0.b, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f1cvtlt z32.h, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: f1cvtlt z32.h, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + +f2cvtlt z0.h, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: f2cvtlt z0.h, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f2cvtlt z0.b, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: f2cvtlt z0.b, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +f2cvtlt z32.h, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: f2cvtlt z32.h, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + +bf1cvtlt z0.h, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bf1cvtlt z0.h, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf1cvtlt z0.b, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bf1cvtlt z0.b, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf1cvtlt z32.h, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bf1cvtlt z32.h, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + +bf2cvtlt z0.h, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bf2cvtlt z0.h, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf2cvtlt z0.b, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bf2cvtlt z0.b, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bf2cvtlt z32.h, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bf2cvtlt z32.h, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/FP8_SVE2/fcvt.s b/llvm/test/MC/AArch64/FP8_SVE2/fcvt.s new file mode 100644 index 0000000000000..2301935db012f --- /dev/null +++ b/llvm/test/MC/AArch64/FP8_SVE2/fcvt.s @@ -0,0 +1,237 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+fp8 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+fp8 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+fp8 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2,+fp8 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+fp8 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+fp8 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2,+fp8 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// +// F1CVT instructions +// +f1cvt z0.h, z0.b // 01100101-00001000-00110000-00000000 +// CHECK-INST: f1cvt z0.h, z0.b +// CHECK-ENCODING: [0x00,0x30,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65083000 + +f1cvt z0.h, z31.b // 01100101-00001000-00110011-11100000 +// CHECK-INST: f1cvt z0.h, z31.b +// CHECK-ENCODING: [0xe0,0x33,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650833e0 + +f1cvt z31.h, z0.b // 01100101-00001000-00110000-00011111 +// CHECK-INST: f1cvt z31.h, z0.b +// CHECK-ENCODING: [0x1f,0x30,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 6508301f + +f1cvt z31.h, z31.b // 01100101-00001000-00110011-11111111 +// CHECK-INST: f1cvt z31.h, z31.b +// CHECK-ENCODING: [0xff,0x33,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650833ff + +// +// F2CVT instructions +// +f2cvt z0.h, z0.b // 01100101-00001000-00110100-00000000 +// CHECK-INST: f2cvt z0.h, z0.b +// CHECK-ENCODING: [0x00,0x34,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65083400 + +f2cvt z0.h, z31.b // 01100101-00001000-00110111-11100000 +// CHECK-INST: f2cvt z0.h, z31.b +// CHECK-ENCODING: [0xe0,0x37,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650837e0 + +f2cvt z31.h, z0.b // 01100101-00001000-00110100-00011111 +// CHECK-INST: f2cvt z31.h, z0.b +// CHECK-ENCODING: [0x1f,0x34,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 6508341f + +f2cvt z31.h, z31.b // 01100101-00001000-00110111-11111111 +// CHECK-INST: f2cvt z31.h, z31.b +// CHECK-ENCODING: [0xff,0x37,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650837ff + + +// +// BF1CVT instructions +// +bf1cvt z0.h, z0.b // 01100101-00001000-00111000-00000000 +// CHECK-INST: bf1cvt z0.h, z0.b +// CHECK-ENCODING: [0x00,0x38,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65083800 + +bf1cvt z0.h, z31.b // 01100101-00001000-00111011-11100000 +// CHECK-INST: bf1cvt z0.h, z31.b +// CHECK-ENCODING: [0xe0,0x3b,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65083be0 + +bf1cvt z31.h, z0.b // 01100101-00001000-00111000-00011111 +// CHECK-INST: bf1cvt z31.h, z0.b +// CHECK-ENCODING: [0x1f,0x38,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 6508381f + +bf1cvt z31.h, z31.b // 01100101-00001000-00111011-11111111 +// CHECK-INST: bf1cvt z31.h, z31.b +// CHECK-ENCODING: [0xff,0x3b,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65083bff + + +// +// BF2CVT instructions +// +bf2cvt z0.h, z0.b // 01100101-00001000-00111100-00000000 +// CHECK-INST: bf2cvt z0.h, z0.b +// CHECK-ENCODING: [0x00,0x3c,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65083c00 + +bf2cvt z0.h, z31.b // 01100101-00001000-00111111-11100000 +// CHECK-INST: bf2cvt z0.h, z31.b +// CHECK-ENCODING: [0xe0,0x3f,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65083fe0 + +bf2cvt z31.h, z0.b // 01100101-00001000-00111100-00011111 +// CHECK-INST: bf2cvt z31.h, z0.b +// CHECK-ENCODING: [0x1f,0x3c,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65083c1f + +bf2cvt z31.h, z31.b // 01100101-00001000-00111111-11111111 +// CHECK-INST: bf2cvt z31.h, z31.b +// CHECK-ENCODING: [0xff,0x3f,0x08,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65083fff + + +// +// F1CVTLT instructions +// +f1cvtlt z0.h, z0.b // 01100101-00001001-00110000-00000000 +// CHECK-INST: f1cvtlt z0.h, z0.b +// CHECK-ENCODING: [0x00,0x30,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65093000 + +f1cvtlt z0.h, z31.b // 01100101-00001001-00110011-11100000 +// CHECK-INST: f1cvtlt z0.h, z31.b +// CHECK-ENCODING: [0xe0,0x33,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650933e0 + +f1cvtlt z31.h, z0.b // 01100101-00001001-00110000-00011111 +// CHECK-INST: f1cvtlt z31.h, z0.b +// CHECK-ENCODING: [0x1f,0x30,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 6509301f + +f1cvtlt z31.h, z31.b // 01100101-00001001-00110011-11111111 +// CHECK-INST: f1cvtlt z31.h, z31.b +// CHECK-ENCODING: [0xff,0x33,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650933ff + + +// +// F2CVTLT instructions +// +f2cvtlt z0.h, z0.b // 01100101-00001001-00110100-00000000 +// CHECK-INST: f2cvtlt z0.h, z0.b +// CHECK-ENCODING: [0x00,0x34,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65093400 + +f2cvtlt z0.h, z31.b // 01100101-00001001-00110111-11100000 +// CHECK-INST: f2cvtlt z0.h, z31.b +// CHECK-ENCODING: [0xe0,0x37,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650937e0 + +f2cvtlt z31.h, z0.b // 01100101-00001001-00110100-00011111 +// CHECK-INST: f2cvtlt z31.h, z0.b +// CHECK-ENCODING: [0x1f,0x34,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 6509341f + +f2cvtlt z31.h, z31.b // 01100101-00001001-00110111-11111111 +// CHECK-INST: f2cvtlt z31.h, z31.b +// CHECK-ENCODING: [0xff,0x37,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650937ff + + +// +// BF1CVTLT instructions +// +bf1cvtlt z0.h, z0.b // 01100101-00001001-00111000-00000000 +// CHECK-INST: bf1cvtlt z0.h, z0.b +// CHECK-ENCODING: [0x00,0x38,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65093800 + +bf1cvtlt z0.h, z31.b // 01100101-00001001-00111011-11100000 +// CHECK-INST: bf1cvtlt z0.h, z31.b +// CHECK-ENCODING: [0xe0,0x3b,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65093be0 + +bf1cvtlt z31.h, z0.b // 01100101-00001001-00111000-00011111 +// CHECK-INST: bf1cvtlt z31.h, z0.b +// CHECK-ENCODING: [0x1f,0x38,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 6509381f + +bf1cvtlt z31.h, z31.b // 01100101-00001001-00111011-11111111 +// CHECK-INST: bf1cvtlt z31.h, z31.b +// CHECK-ENCODING: [0xff,0x3b,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65093bff + + +// +// BF2CVTLT instructions +// +bf2cvtlt z0.h, z0.b // 01100101-00001001-00111100-00000000 +// CHECK-INST: bf2cvtlt z0.h, z0.b +// CHECK-ENCODING: [0x00,0x3c,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65093c00 + +bf2cvtlt z0.h, z31.b // 01100101-00001001-00111111-11100000 +// CHECK-INST: bf2cvtlt z0.h, z31.b +// CHECK-ENCODING: [0xe0,0x3f,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65093fe0 + +bf2cvtlt z31.h, z0.b // 01100101-00001001-00111100-00011111 +// CHECK-INST: bf2cvtlt z31.h, z0.b +// CHECK-ENCODING: [0x1f,0x3c,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65093c1f + +bf2cvtlt z31.h, z31.b // 01100101-00001001-00111111-11111111 +// CHECK-INST: bf2cvtlt z31.h, z31.b +// CHECK-ENCODING: [0xff,0x3f,0x09,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 65093fff diff --git a/llvm/test/MC/AArch64/FP8_SVE2/fcvtn-diagnostics.s b/llvm/test/MC/AArch64/FP8_SVE2/fcvtn-diagnostics.s new file mode 100644 index 0000000000000..86cca86542aa3 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8_SVE2/fcvtn-diagnostics.s @@ -0,0 +1,70 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+fp8 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+fp8 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// + +fcvtn z0.b, {z1.h, z2.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: fcvtn z0.b, {z1.h, z2.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtn z0.h, {z0.h, z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fcvtn z0.h, {z0.h, z1.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtn z0.b, {z0.b, z1.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fcvtn z0.b, {z0.b, z1.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + + +fcvtnb z0.b, {z1.s, z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: fcvtnb z0.b, {z1.s, z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtnb z0.h, {z0.s, z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvtnb z0.h, {z0.s, z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtnb z0.b, {z0.h, z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fcvtnb z0.b, {z0.h, z1.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + + +bfcvtn z0.b, {z1.h, z2.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: bfcvtn z0.b, {z1.h, z2.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfcvtn z0.h, {z0.h, z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfcvtn z0.h, {z0.h, z1.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfcvtn z0.b, {z0.b, z1.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfcvtn z0.b, {z0.b, z1.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + + +fcvtnt z0.b, {z1.s, z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: fcvtnt z0.b, {z1.s, z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtnt z0.h, {z0.s, z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fcvtnt z0.h, {z0.s, z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtnt z0.b, {z0.h, z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fcvtnt z0.b, {z0.h, z1.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/FP8_SVE2/fcvtn.s b/llvm/test/MC/AArch64/FP8_SVE2/fcvtn.s new file mode 100644 index 0000000000000..e16ff24098ef1 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8_SVE2/fcvtn.s @@ -0,0 +1,125 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+fp8 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+fp8 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+fp8 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2,+fp8 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+fp8 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+fp8 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2,+fp8 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// +// FCVTN instructions +// +fcvtn z0.b, {z0.h, z1.h} // 01100101-00001010-00110000-00000000 +// CHECK-INST: fcvtn z0.b, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x30,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a3000 + +fcvtn z0.b, {z30.h, z31.h} // 01100101-00001010-00110011-11000000 +// CHECK-INST: fcvtn z0.b, { z30.h, z31.h } +// CHECK-ENCODING: [0xc0,0x33,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a33c0 + +fcvtn z31.b, {z0.h, z1.h} // 01100101-00001010-00110000-00011111 +// CHECK-INST: fcvtn z31.b, { z0.h, z1.h } +// CHECK-ENCODING: [0x1f,0x30,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a301f + +fcvtn z31.b, {z30.h, z31.h} // 01100101-00001010-00110011-11011111 +// CHECK-INST: fcvtn z31.b, { z30.h, z31.h } +// CHECK-ENCODING: [0xdf,0x33,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a33df + +// +// FCVTNB instructions +// +fcvtnb z0.b, {z0.s, z1.s} // 01100101-00001010-00110100-00000000 +// CHECK-INST: fcvtnb z0.b, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0x34,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a3400 + +fcvtnb z0.b, {z30.s, z31.s} // 01100101-00001010-00110111-11000000 +// CHECK-INST: fcvtnb z0.b, { z30.s, z31.s } +// CHECK-ENCODING: [0xc0,0x37,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a37c0 + +fcvtnb z31.b, {z0.s, z1.s} // 01100101-00001010-00110100-00011111 +// CHECK-INST: fcvtnb z31.b, { z0.s, z1.s } +// CHECK-ENCODING: [0x1f,0x34,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a341f + +fcvtnb z31.b, {z30.s, z31.s} // 01100101-00001010-00110111-11011111 +// CHECK-INST: fcvtnb z31.b, { z30.s, z31.s } +// CHECK-ENCODING: [0xdf,0x37,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a37df + + +// +// BFCVTN instructions +// +bfcvtn z0.b, {z0.h, z1.h} // 01100101-00001010-00111000-00000000 +// CHECK-INST: bfcvtn z0.b, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x38,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a3800 + +bfcvtn z0.b, {z30.h, z31.h} // 01100101-00001010-00111011-11000000 +// CHECK-INST: bfcvtn z0.b, { z30.h, z31.h } +// CHECK-ENCODING: [0xc0,0x3b,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a3bc0 + +bfcvtn z31.b, {z0.h, z1.h} // 01100101-00001010-00111000-00011111 +// CHECK-INST: bfcvtn z31.b, { z0.h, z1.h } +// CHECK-ENCODING: [0x1f,0x38,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a381f + +bfcvtn z31.b, {z30.h, z31.h} // 01100101-00001010-00111011-11011111 +// CHECK-INST: bfcvtn z31.b, { z30.h, z31.h } +// CHECK-ENCODING: [0xdf,0x3b,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a3bdf + + +// +// FCVTNT instructions +// +fcvtnt z0.b, {z0.s, z1.s} // 01100101-00001010-00111100-00000000 +// CHECK-INST: fcvtnt z0.b, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0x3c,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a3c00 + +fcvtnt z0.b, {z30.s, z31.s} // 01100101-00001010-00111111-11000000 +// CHECK-INST: fcvtnt z0.b, { z30.s, z31.s } +// CHECK-ENCODING: [0xc0,0x3f,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a3fc0 + +fcvtnt z31.b, {z0.s, z1.s} // 01100101-00001010-00111100-00011111 +// CHECK-INST: fcvtnt z31.b, { z0.s, z1.s } +// CHECK-ENCODING: [0x1f,0x3c,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a3c1f + +fcvtnt z31.b, {z30.s, z31.s} // 01100101-00001010-00111111-11011111 +// CHECK-INST: fcvtnt z31.b, { z30.s, z31.s } +// CHECK-ENCODING: [0xdf,0x3f,0x0a,0x65] +// CHECK-ERROR: instruction requires: fp8 sve2 +// CHECK-UNKNOWN: 650a3fdf diff --git a/llvm/test/MC/AArch64/SVE2/fcvtnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fcvtnt-diagnostics.s index abfdea8c4853b..ae287b94ec01b 100644 --- a/llvm/test/MC/AArch64/SVE2/fcvtnt-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE2/fcvtnt-diagnostics.s @@ -5,7 +5,7 @@ // Invalid element width fcvtnt z0.b, p0/m, z0.b -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: fcvtnt z0.b, p0/m, z0.b // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: @@ -25,7 +25,7 @@ fcvtnt z0.d, p0/m, z0.d // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: fcvtnt z0.b, p0/m, z0.h -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: fcvtnt z0.b, p0/m, z0.h // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index b432e7ac2d868..b662fbe3457cb 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -1732,7 +1732,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { AArch64::AEK_RCPC3, AArch64::AEK_THE, AArch64::AEK_D128, AArch64::AEK_LSE128, AArch64::AEK_SPECRES2, AArch64::AEK_RASv2, AArch64::AEK_ITE, AArch64::AEK_GCS, AArch64::AEK_FPMR, - }; + AArch64::AEK_FP8}; std::vector Features; @@ -1805,6 +1805,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+ite")); EXPECT_TRUE(llvm::is_contained(Features, "+gcs")); EXPECT_TRUE(llvm::is_contained(Features, "+fpmr")); + EXPECT_TRUE(llvm::is_contained(Features, "+fp8")); // Assuming we listed every extension above, this should produce the same // result. (note that AEK_NONE doesn't have a name so it won't be in the @@ -1929,6 +1930,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"rasv2", "norasv2", "+rasv2", "-rasv2"}, {"gcs", "nogcs", "+gcs", "-gcs"}, {"fpmr", "nofpmr", "+fpmr", "-fpmr"}, + {"fp8", "nofp8", "+fp8", "-fp8"}, }; for (unsigned i = 0; i < std::size(ArchExt); i++) { From f3f0672039f42610101db8bbe97f521a65ede413 Mon Sep 17 00:00:00 2001 From: Egor Zhdan Date: Thu, 26 Oct 2023 16:14:23 +0100 Subject: [PATCH 083/877] [Clang] Fix several `-Wdocumentation` warnings (NFC) ``` clang/include/clang/Lex/Preprocessor.h:2893:14: warning: parameter 'isEnter:' not found in the function declaration [-Wdocumentation] /// \param isEnter: true if this PP is entering a region; otherwise, this PP ^~~~~~~~ clang/include/clang/Lex/Preprocessor.h:2895:14: warning: parameter 'Loc:' not found in the function declaration [-Wdocumentation] /// \param Loc: the location of the entry or exit of a ^~~~ clang/include/clang/Lex/Preprocessor.h:2907:14: warning: parameter 'StartLoc:' not found in the function declaration [-Wdocumentation] /// \param StartLoc: output argument. It will be set to the start location of ^~~~~~~~~ ``` --- clang/include/clang/Lex/Preprocessor.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 18d88407ae12c..4a99447e757c6 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2879,9 +2879,9 @@ class Preprocessor { /// Alter the state of whether this PP currently is in a /// "-Wunsafe-buffer-usage" opt-out region. /// - /// \param isEnter: true if this PP is entering a region; otherwise, this PP + /// \param isEnter true if this PP is entering a region; otherwise, this PP /// is exiting a region - /// \param Loc: the location of the entry or exit of a + /// \param Loc the location of the entry or exit of a /// region /// \return true iff it is INVALID to enter or exit a region, i.e., /// attempt to enter a region before exiting a previous region, or exiting a @@ -2893,7 +2893,7 @@ class Preprocessor { /// opt-out region bool isPPInSafeBufferOptOutRegion(); - /// \param StartLoc: output argument. It will be set to the start location of + /// \param StartLoc output argument. It will be set to the start location of /// the current "-Wunsafe-buffer-usage" opt-out region iff this function /// returns true. /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage" From a6d0e8791e1015a12f8277e6032eae9fdab686fe Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Thu, 26 Oct 2023 19:22:52 +0400 Subject: [PATCH 084/877] [clang][NFC] Refactor enums that hold size of `Type` and `DeclContext` bit-fields (#70296) This patch refactor said enums to hold total size of a bit-field, and not just non-inherited bits. This brings `Type` and `DeclContext` in line with `Comment` and `Stmt`. It also makes it unnecessary to list all transitive bases of a bit-field as unnamed bit-fields, which makes it more friendly towards debuggers. --- clang/include/clang/AST/DeclBase.h | 49 ++++++++++------------- clang/include/clang/AST/Type.h | 6 +-- clang/lib/Serialization/ASTWriterDecl.cpp | 18 ++++----- 3 files changed, 33 insertions(+), 40 deletions(-) diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index d383e46e22e16..978e4255e877e 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -1515,16 +1515,14 @@ class DeclContext { uint64_t IsThisDeclarationADemotedDefinition : 1; }; - /// Number of non-inherited bits in TagDeclBitfields. - enum { NumTagDeclBits = 10 }; + /// Number of inherited and non-inherited bits in TagDeclBitfields. + enum { NumTagDeclBits = NumDeclContextBits + 10 }; /// Stores the bits used by EnumDecl. /// If modified NumEnumDeclBit and the accessor /// methods in EnumDecl should be updated appropriately. class EnumDeclBitfields { friend class EnumDecl; - /// For the bits in DeclContextBitfields. - uint64_t : NumDeclContextBits; /// For the bits in TagDeclBitfields. uint64_t : NumTagDeclBits; @@ -1554,16 +1552,14 @@ class DeclContext { uint64_t HasODRHash : 1; }; - /// Number of non-inherited bits in EnumDeclBitfields. - enum { NumEnumDeclBits = 20 }; + /// Number of inherited and non-inherited bits in EnumDeclBitfields. + enum { NumEnumDeclBits = NumTagDeclBits + 20 }; /// Stores the bits used by RecordDecl. /// If modified NumRecordDeclBits and the accessor /// methods in RecordDecl should be updated appropriately. class RecordDeclBitfields { friend class RecordDecl; - /// For the bits in DeclContextBitfields. - uint64_t : NumDeclContextBits; /// For the bits in TagDeclBitfields. uint64_t : NumTagDeclBits; @@ -1615,8 +1611,8 @@ class DeclContext { uint64_t ODRHash : 26; }; - /// Number of non-inherited bits in RecordDeclBitfields. - enum { NumRecordDeclBits = 41 }; + /// Number of inherited and non-inherited bits in RecordDeclBitfields. + enum { NumRecordDeclBits = NumTagDeclBits + 41 }; /// Stores the bits used by OMPDeclareReductionDecl. /// If modified NumOMPDeclareReductionDeclBits and the accessor @@ -1631,8 +1627,9 @@ class DeclContext { uint64_t InitializerKind : 2; }; - /// Number of non-inherited bits in OMPDeclareReductionDeclBitfields. - enum { NumOMPDeclareReductionDeclBits = 2 }; + /// Number of inherited and non-inherited bits in + /// OMPDeclareReductionDeclBitfields. + enum { NumOMPDeclareReductionDeclBits = NumDeclContextBits + 2 }; /// Stores the bits used by FunctionDecl. /// If modified NumFunctionDeclBits and the accessor @@ -1711,16 +1708,14 @@ class DeclContext { uint64_t FriendConstraintRefersToEnclosingTemplate : 1; }; - /// Number of non-inherited bits in FunctionDeclBitfields. - enum { NumFunctionDeclBits = 31 }; + /// Number of inherited and non-inherited bits in FunctionDeclBitfields. + enum { NumFunctionDeclBits = NumDeclContextBits + 31 }; /// Stores the bits used by CXXConstructorDecl. If modified /// NumCXXConstructorDeclBits and the accessor /// methods in CXXConstructorDecl should be updated appropriately. class CXXConstructorDeclBitfields { friend class CXXConstructorDecl; - /// For the bits in DeclContextBitfields. - uint64_t : NumDeclContextBits; /// For the bits in FunctionDeclBitfields. uint64_t : NumFunctionDeclBits; @@ -1739,10 +1734,8 @@ class DeclContext { uint64_t IsSimpleExplicit : 1; }; - /// Number of non-inherited bits in CXXConstructorDeclBitfields. - enum { - NumCXXConstructorDeclBits = 64 - NumDeclContextBits - NumFunctionDeclBits - }; + /// Number of inherited and non-inherited bits in CXXConstructorDeclBitfields. + enum { NumCXXConstructorDeclBits = NumFunctionDeclBits + 20 }; /// Stores the bits used by ObjCMethodDecl. /// If modified NumObjCMethodDeclBits and the accessor @@ -1803,8 +1796,8 @@ class DeclContext { uint64_t HasSkippedBody : 1; }; - /// Number of non-inherited bits in ObjCMethodDeclBitfields. - enum { NumObjCMethodDeclBits = 24 }; + /// Number of inherited and non-inherited bits in ObjCMethodDeclBitfields. + enum { NumObjCMethodDeclBits = NumDeclContextBits + 24 }; /// Stores the bits used by ObjCContainerDecl. /// If modified NumObjCContainerDeclBits and the accessor @@ -1819,10 +1812,10 @@ class DeclContext { SourceLocation AtStart; }; - /// Number of non-inherited bits in ObjCContainerDeclBitfields. + /// Number of inherited and non-inherited bits in ObjCContainerDeclBitfields. /// Note that here we rely on the fact that SourceLocation is 32 bits /// wide. We check this with the static_assert in the ctor of DeclContext. - enum { NumObjCContainerDeclBits = 64 - NumDeclContextBits }; + enum { NumObjCContainerDeclBits = 64 }; /// Stores the bits used by LinkageSpecDecl. /// If modified NumLinkageSpecDeclBits and the accessor @@ -1843,8 +1836,8 @@ class DeclContext { uint64_t HasBraces : 1; }; - /// Number of non-inherited bits in LinkageSpecDeclBitfields. - enum { NumLinkageSpecDeclBits = 4 }; + /// Number of inherited and non-inherited bits in LinkageSpecDeclBitfields. + enum { NumLinkageSpecDeclBits = NumDeclContextBits + 4 }; /// Stores the bits used by BlockDecl. /// If modified NumBlockDeclBits and the accessor @@ -1869,8 +1862,8 @@ class DeclContext { uint64_t CanAvoidCopyToHeap : 1; }; - /// Number of non-inherited bits in BlockDeclBitfields. - enum { NumBlockDeclBits = 5 }; + /// Number of inherited and non-inherited bits in BlockDeclBitfields. + enum { NumBlockDeclBits = NumDeclContextBits + 5 }; /// Pointer to the data structure used to lookup declarations /// within this context (or a DependentStoredDeclsMap if this is a diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index e3dbe3b8a45cc..1e8e1303e65f6 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -1663,11 +1663,12 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { /// Actually an ArrayType::ArraySizeModifier. unsigned SizeModifier : 3; }; + enum { NumArrayTypeBits = NumTypeBits + 6 }; class ConstantArrayTypeBitfields { friend class ConstantArrayType; - unsigned : NumTypeBits + 3 + 3; + unsigned : NumArrayTypeBits; /// Whether we have a stored size expression. unsigned HasStoredSizeExpr : 1; @@ -1780,7 +1781,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { unsigned Keyword : 8; }; - enum { NumTypeWithKeywordBits = 8 }; + enum { NumTypeWithKeywordBits = NumTypeBits + 8 }; class ElaboratedTypeBitfields { friend class ElaboratedType; @@ -1913,7 +1914,6 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { class DependentTemplateSpecializationTypeBitfields { friend class DependentTemplateSpecializationType; - unsigned : NumTypeBits; unsigned : NumTypeWithKeywordBits; /// The number of template arguments named in this class template diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 8a2ea7c7624ce..b3364113abf15 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -431,7 +431,7 @@ void ASTDeclWriter::VisitTypeAliasDecl(TypeAliasDecl *D) { } void ASTDeclWriter::VisitTagDecl(TagDecl *D) { - static_assert(DeclContext::NumTagDeclBits == 10, + static_assert(DeclContext::NumTagDeclBits == 23, "You need to update the serializer after you change the " "TagDeclBits"); @@ -459,7 +459,7 @@ void ASTDeclWriter::VisitTagDecl(TagDecl *D) { } void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) { - static_assert(DeclContext::NumEnumDeclBits == 20, + static_assert(DeclContext::NumEnumDeclBits == 43, "You need to update the serializer after you change the " "EnumDeclBits"); @@ -506,7 +506,7 @@ void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) { } void ASTDeclWriter::VisitRecordDecl(RecordDecl *D) { - static_assert(DeclContext::NumRecordDeclBits == 41, + static_assert(DeclContext::NumRecordDeclBits == 64, "You need to update the serializer after you change the " "RecordDeclBits"); @@ -578,7 +578,7 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) { } void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { - static_assert(DeclContext::NumFunctionDeclBits == 31, + static_assert(DeclContext::NumFunctionDeclBits == 44, "You need to update the serializer after you change the " "FunctionDeclBits"); @@ -726,7 +726,7 @@ void ASTDeclWriter::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) { } void ASTDeclWriter::VisitObjCMethodDecl(ObjCMethodDecl *D) { - static_assert(DeclContext::NumObjCMethodDeclBits == 24, + static_assert(DeclContext::NumObjCMethodDeclBits == 37, "You need to update the serializer after you change the " "ObjCMethodDeclBits"); @@ -788,7 +788,7 @@ void ASTDeclWriter::VisitObjCTypeParamDecl(ObjCTypeParamDecl *D) { } void ASTDeclWriter::VisitObjCContainerDecl(ObjCContainerDecl *D) { - static_assert(DeclContext::NumObjCContainerDeclBits == 51, + static_assert(DeclContext::NumObjCContainerDeclBits == 64, "You need to update the serializer after you change the " "ObjCContainerDeclBits"); @@ -1268,7 +1268,7 @@ void ASTDeclWriter::VisitCapturedDecl(CapturedDecl *CD) { } void ASTDeclWriter::VisitLinkageSpecDecl(LinkageSpecDecl *D) { - static_assert(DeclContext::NumLinkageSpecDeclBits == 4, + static_assert(DeclContext::NumLinkageSpecDeclBits == 17, "You need to update the serializer after you change the" "LinkageSpecDeclBits"); @@ -1479,7 +1479,7 @@ void ASTDeclWriter::VisitCXXMethodDecl(CXXMethodDecl *D) { } void ASTDeclWriter::VisitCXXConstructorDecl(CXXConstructorDecl *D) { - static_assert(DeclContext::NumCXXConstructorDeclBits == 20, + static_assert(DeclContext::NumCXXConstructorDeclBits == 64, "You need to update the serializer after you change the " "CXXConstructorDeclBits"); @@ -1960,7 +1960,7 @@ void ASTDeclWriter::VisitOMPRequiresDecl(OMPRequiresDecl *D) { } void ASTDeclWriter::VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D) { - static_assert(DeclContext::NumOMPDeclareReductionDeclBits == 2, + static_assert(DeclContext::NumOMPDeclareReductionDeclBits == 15, "You need to update the serializer after you change the " "NumOMPDeclareReductionDeclBits"); From a3490920615e963732d70df2ef77eafa35b8ee0f Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 25 Oct 2023 11:19:27 +0200 Subject: [PATCH 085/877] [libc++] Add __small_buffer This is an implementation detail for `move_only_function` (and potentially other type-erasing classes). Reviewed By: #libc, ldionne Spies: Mordante, ldionne, EricWF, libcxx-commits Differential Revision: https://reviews.llvm.org/D140259 --- libcxx/include/CMakeLists.txt | 1 + libcxx/include/__utility/small_buffer.h | 99 +++++++++++++++++++ libcxx/include/module.modulemap.in | 1 + .../utilities/utility/small_buffer.pass.cpp | 74 ++++++++++++++ 4 files changed, 175 insertions(+) create mode 100644 libcxx/include/__utility/small_buffer.h create mode 100644 libcxx/test/libcxx/utilities/utility/small_buffer.pass.cpp diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 1f36ff6b3896e..b7b14200498a2 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -854,6 +854,7 @@ set(files __utility/piecewise_construct.h __utility/priority_tag.h __utility/rel_ops.h + __utility/small_buffer.h __utility/swap.h __utility/to_underlying.h __utility/unreachable.h diff --git a/libcxx/include/__utility/small_buffer.h b/libcxx/include/__utility/small_buffer.h new file mode 100644 index 0000000000000..0c6d4986a5851 --- /dev/null +++ b/libcxx/include/__utility/small_buffer.h @@ -0,0 +1,99 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___UTILITY_SMALL_BUFFER_H +#define _LIBCPP___UTILITY_SMALL_BUFFER_H + +#include <__config> +#include <__memory/construct_at.h> +#include <__type_traits/decay.h> +#include <__type_traits/is_trivially_destructible.h> +#include <__type_traits/is_trivially_move_constructible.h> +#include <__utility/exception_guard.h> +#include <__utility/forward.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER >= 23 + +// __small_buffer is a helper class to perform the well known SBO (small buffer optimization). It is mainly useful to +// allow type-erasing classes like move_only_function to store small objects in a local buffer without requiring an +// allocation. +// +// This small buffer class only allows storing trivially relocatable objects inside the local storage to allow +// __small_buffer to be trivially relocatable itself. Since the buffer doesn't know what's stored inside it, the user +// has to manage the object's lifetime, in particular the destruction of the object. + +_LIBCPP_BEGIN_NAMESPACE_STD + +template + requires(_BufferSize > 0 && _BufferAlignment > 0) +class __small_buffer { +public: + template > + static constexpr bool __fits_in_buffer = + is_trivially_move_constructible_v<_Decayed> && is_trivially_destructible_v<_Decayed> && + sizeof(_Decayed) <= _BufferSize && alignof(_Decayed) <= _BufferAlignment; + + _LIBCPP_HIDE_FROM_ABI __small_buffer() = default; + __small_buffer(const __small_buffer&) = delete; + __small_buffer& operator=(const __small_buffer&) = delete; + _LIBCPP_HIDE_FROM_ABI ~__small_buffer() = default; + + // Relocates the buffer - __delete() should never be called on a moved-from __small_buffer + _LIBCPP_HIDE_FROM_ABI __small_buffer(__small_buffer&&) = default; + _LIBCPP_HIDE_FROM_ABI __small_buffer& operator=(__small_buffer&&) = default; + + template + _LIBCPP_HIDE_FROM_ABI _Stored* __get() { + if constexpr (__fits_in_buffer<_Stored>) + return std::launder(reinterpret_cast<_Stored*>(__buffer_)); + else + return *std::launder(reinterpret_cast<_Stored**>(__buffer_)); + } + + template + _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE _LIBCPP_HIDE_FROM_ABI _Stored* __alloc() { + if constexpr (__fits_in_buffer<_Stored>) { + return std::launder(reinterpret_cast<_Stored*>(__buffer_)); + } else { + byte* __allocation = static_cast(::operator new[](sizeof(_Stored), align_val_t{alignof(_Stored)})); + std::construct_at(reinterpret_cast(__buffer_), __allocation); + return std::launder(reinterpret_cast<_Stored*>(__allocation)); + } + } + + template + _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE _LIBCPP_HIDE_FROM_ABI void __dealloc() noexcept { + if constexpr (!__fits_in_buffer<_Stored>) + ::operator delete[](*reinterpret_cast(__buffer_), sizeof(_Stored), align_val_t{alignof(_Stored)}); + } + + template + _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE _LIBCPP_HIDE_FROM_ABI void __construct(_Args&&... __args) { + _Stored* __buffer = __alloc<_Stored>(); + auto __guard = std::__make_exception_guard([&] { __dealloc<_Stored>(); }); + std::construct_at(__buffer, std::forward<_Args>(__args)...); + __guard.__complete(); + } + +private: + alignas(_BufferAlignment) byte __buffer_[_BufferSize]; +}; + +# undef _LIBCPP_SMALL_BUFFER_TRIVIAL_ABI + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 23 + +#endif // _LIBCPP___UTILITY_SMALL_BUFFER_H diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index 41158212c9a86..f447b2fa0a0ce 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -2079,6 +2079,7 @@ module std_private_utility_pair_fwd [system] { header "__fwd/pair. module std_private_utility_piecewise_construct [system] { header "__utility/piecewise_construct.h" } module std_private_utility_priority_tag [system] { header "__utility/priority_tag.h" } module std_private_utility_rel_ops [system] { header "__utility/rel_ops.h" } +module std_private_utility_small_buffer [system] { header "__utility/small_buffer.h" } module std_private_utility_swap [system] { header "__utility/swap.h" export std_private_type_traits_is_swappable diff --git a/libcxx/test/libcxx/utilities/utility/small_buffer.pass.cpp b/libcxx/test/libcxx/utilities/utility/small_buffer.pass.cpp new file mode 100644 index 0000000000000..2214efa486870 --- /dev/null +++ b/libcxx/test/libcxx/utilities/utility/small_buffer.pass.cpp @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// XFAIL: availability-aligned_allocation-missing + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +#include "test_macros.h" + +TEST_DIAGNOSTIC_PUSH +TEST_CLANG_DIAGNOSTIC_IGNORED("-Wprivate-header") +#include <__utility/small_buffer.h> +TEST_DIAGNOSTIC_POP + +#include +#include +#include + +struct NotTriviallyRelocatable { + char c_; + + NotTriviallyRelocatable(char c) : c_(c) {} + ~NotTriviallyRelocatable() {} +}; + +struct alignas(16) Overaligned { + int i; +}; + +int main(int, char**) { + using BufferT = std::__small_buffer<8, 8>; + static_assert(sizeof(BufferT) == 8); + static_assert(alignof(BufferT) == 8); + static_assert(BufferT::__fits_in_buffer); + static_assert(!BufferT::__fits_in_buffer); + static_assert(!BufferT::__fits_in_buffer); + + BufferT buf; + + { // construct/destroy in the same place + buf.__construct(3); + assert(*buf.__get() == 3); + std::destroy_at(buf.__get()); + buf.__dealloc(); + + buf.__construct(3); + assert(buf.__get()->c_ == 3); + std::destroy_at(buf.__get()); + buf.__dealloc(); + } + + { // Move the buffer around + buf.__construct(3); + assert(*buf.__get() == 3); + auto buf2 = std::move(buf); + assert(*buf2.__get() == 3); + std::destroy_at(buf2.__get()); + buf2.__dealloc(); + + buf.__construct(3); + assert(buf.__get()->c_ == 3); + auto buf3 = std::move(buf); + assert(buf3.__get()->c_ == 3); + std::destroy_at(buf3.__get()); + buf3.__dealloc(); + } + + return 0; +} From c555a12377307909bd47e5de798059089eaa3f85 Mon Sep 17 00:00:00 2001 From: Igor Zhukov Date: Tue, 26 Sep 2023 09:33:41 -0400 Subject: [PATCH 086/877] [libc++] Make sure std::declval() produces an error when ODR-used Fixes https://github.com/llvm/llvm-project/issues/61202 Differential Revision: https://reviews.llvm.org/D145376 --- libcxx/include/__utility/declval.h | 6 ++- .../deleted_output_functions.verify.cpp | 53 +++++++++++-------- .../utility/declval/declval.verify.cpp | 16 ++++++ 3 files changed, 52 insertions(+), 23 deletions(-) create mode 100644 libcxx/test/std/utilities/utility/declval/declval.verify.cpp diff --git a/libcxx/include/__utility/declval.h b/libcxx/include/__utility/declval.h index c2f4bec132827..d0856b8afa4db 100644 --- a/libcxx/include/__utility/declval.h +++ b/libcxx/include/__utility/declval.h @@ -27,7 +27,11 @@ _Tp __declval(long); _LIBCPP_SUPPRESS_DEPRECATED_POP template -decltype(std::__declval<_Tp>(0)) declval() _NOEXCEPT; +_LIBCPP_HIDE_FROM_ABI decltype(std::__declval<_Tp>(0)) declval() _NOEXCEPT { + static_assert(!__is_same(_Tp, _Tp), + "std::declval can only be used in an unevaluated context. " + "It's likely that your current usage is trying to extract a value from the function."); +} _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream/deleted_output_functions.verify.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream/deleted_output_functions.verify.cpp index d8d2a94791b3a..2916a94cdcd6f 100644 --- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream/deleted_output_functions.verify.cpp +++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream/deleted_output_functions.verify.cpp @@ -18,38 +18,47 @@ void f() { std::ostringstream s; +#ifndef TEST_HAS_NO_CHAR8_T + char8_t c8_s[] = u8"test"; + const char8_t* c8_cs = u8"test"; +#endif + char16_t c16_s[] = u"test"; + const char16_t* c16_cs = u"test"; + char32_t c32_s[] = U"test"; + const char32_t* c32_cs = U"test"; #ifndef TEST_HAS_NO_WIDE_CHARACTERS - - s << wchar_t(); // expected-error {{overload resolution selected deleted operator '<<'}} - s << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} - s << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} + wchar_t w_s[] = L"test"; + const wchar_t* w_cs = L"test"; + s << wchar_t(); // expected-error {{overload resolution selected deleted operator '<<'}} + s << w_s; // expected-error {{overload resolution selected deleted operator '<<'}} + s << w_cs; // expected-error {{overload resolution selected deleted operator '<<'}} std::wostringstream sw; # ifndef TEST_HAS_NO_CHAR8_T - sw << char8_t(); // expected-error {{overload resolution selected deleted operator '<<'}} - sw << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} - sw << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} + sw << char8_t(); // expected-error {{overload resolution selected deleted operator '<<'}} + sw << c8_s; // expected-error {{overload resolution selected deleted operator '<<'}} + sw << c8_cs; // expected-error {{overload resolution selected deleted operator '<<'}} # endif - sw << char16_t(); // expected-error {{overload resolution selected deleted operator '<<'}} - sw << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} - sw << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} - sw << char32_t(); // expected-error {{overload resolution selected deleted operator '<<'}} - sw << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} - sw << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} + sw << char16_t(); // expected-error {{overload resolution selected deleted operator '<<'}} + sw << c16_s; // expected-error {{overload resolution selected deleted operator '<<'}} + sw << c16_cs; // expected-error {{overload resolution selected deleted operator '<<'}} + sw << char32_t(); // expected-error {{overload resolution selected deleted operator '<<'}} + sw << c32_s; // expected-error {{overload resolution selected deleted operator '<<'}} + sw << c32_cs; // expected-error {{overload resolution selected deleted operator '<<'}} #endif // TEST_HAS_NO_WIDE_CHARACTERS #ifndef TEST_HAS_NO_CHAR8_T - s << char8_t(); // expected-error {{overload resolution selected deleted operator '<<'}} - s << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} - s << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} + s << char8_t(); // expected-error {{overload resolution selected deleted operator '<<'}} + s << c8_s; // expected-error {{overload resolution selected deleted operator '<<'}} + s << c8_cs; // expected-error {{overload resolution selected deleted operator '<<'}} #endif - s << char16_t(); // expected-error {{overload resolution selected deleted operator '<<'}} - s << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} - s << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} - s << char32_t(); // expected-error {{overload resolution selected deleted operator '<<'}} - s << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} - s << std::declval(); // expected-error {{overload resolution selected deleted operator '<<'}} + s << char16_t(); // expected-error {{overload resolution selected deleted operator '<<'}} + s << c16_s; // expected-error {{overload resolution selected deleted operator '<<'}} + s << c16_cs; // expected-error {{overload resolution selected deleted operator '<<'}} + s << char32_t(); // expected-error {{overload resolution selected deleted operator '<<'}} + s << c32_s; // expected-error {{overload resolution selected deleted operator '<<'}} + s << c32_cs; // expected-error {{overload resolution selected deleted operator '<<'}} } diff --git a/libcxx/test/std/utilities/utility/declval/declval.verify.cpp b/libcxx/test/std/utilities/utility/declval/declval.verify.cpp new file mode 100644 index 0000000000000..a2bd1992aaa83 --- /dev/null +++ b/libcxx/test/std/utilities/utility/declval/declval.verify.cpp @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template typename add_rvalue_reference::type declval() noexcept; + +#include + +int x = std::declval< + int>(); // expected-error-re@*:* {{static assertion failed{{.*}}std::declval can only be used in an unevaluated context.}} From 166b3a86173666ccf823d09e30f40f4a0d386e18 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 26 Oct 2023 11:31:36 -0400 Subject: [PATCH 087/877] [libc++][tests] Fix a few remaining instances of outdated static assertion regexes in our test suite --- .../cpp17_iterator_concepts.verify.cpp | 86 +++++++++---------- .../mdspan/layout_stride/extents.verify.cpp | 4 +- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/libcxx/test/libcxx/algorithms/cpp17_iterator_concepts.verify.cpp b/libcxx/test/libcxx/algorithms/cpp17_iterator_concepts.verify.cpp index 3bc49cd3b1b18..1661738dbdfa1 100644 --- a/libcxx/test/libcxx/algorithms/cpp17_iterator_concepts.verify.cpp +++ b/libcxx/test/libcxx/algorithms/cpp17_iterator_concepts.verify.cpp @@ -83,26 +83,26 @@ struct diff_t_not_signed : valid_iterator { }; void check_iterator_requirements() { - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error-re {{static assertion failed}} // expected-note@*:* {{indirection requires pointer operand}} - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error-re {{static assertion failed}} // expected-note@*:* {{cannot increment value of type 'missing_preincrement'}} - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because 'not_move_constructible' does not satisfy '__cpp17_move_constructible'}} - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because 'not_copy_constructible' does not satisfy '__cpp17_copy_constructible'}} - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because 'not_move_assignable' does not satisfy '__cpp17_copy_assignable'}} - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error-re {{static assertion failed}} // expectted-note@*:* {{because 'not_copy_assignable' does not satisfy '__cpp17_copy_assignable'}} - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error-re {{static assertion failed}} // expectted-note@*:* {{'is_signed_v<__iter_diff_t >' evaluated to false}} } @@ -115,10 +115,10 @@ bool operator==(not_unequality_comparable, not_unequality_comparable); bool operator!=(not_unequality_comparable, not_unequality_comparable) = delete; void check_input_iterator_requirements() { - _LIBCPP_REQUIRE_CPP17_INPUT_ITERATOR(not_equality_comparable); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_INPUT_ITERATOR(not_equality_comparable); // expected-error-re {{static assertion failed}} // expected-note@*:* {{'__lhs == __rhs' would be invalid: overload resolution selected deleted operator '=='}} - _LIBCPP_REQUIRE_CPP17_INPUT_ITERATOR(not_unequality_comparable); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_INPUT_ITERATOR(not_unequality_comparable); // expected-error-re {{static assertion failed}} // expected-note@*:* {{'__lhs != __rhs' would be invalid: overload resolution selected deleted operator '!='}} } @@ -138,9 +138,9 @@ struct postincrement_not_ref : valid_iterator {}; bool operator==(postincrement_not_ref, postincrement_not_ref); void check_forward_iterator_requirements() { - _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(not_default_constructible); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(not_default_constructible); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because 'not_default_constructible' does not satisfy '__cpp17_default_constructible'}} - _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(postincrement_not_ref); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(postincrement_not_ref); // expected-error-re {{static assertion failed}} #ifndef _AIX // expected-note@*:* {{because type constraint 'convertible_to::Proxy, const postincrement_not_ref &>' was not satisfied}} #endif @@ -167,11 +167,11 @@ struct not_returning_iter_reference : valid_forward_iterator >' was not satisfied}} } @@ -359,62 +359,62 @@ struct missing_const_const_greater_eq : valid_random_access_iterator __iter' would be invalid: overload resolution selected deleted operator '>'}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_mut_greater); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_mut_greater); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because 'std::as_const(__iter) > __iter' would be invalid: overload resolution selected deleted operator '>'}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_mut_const_greater); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_mut_const_greater); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because '__iter > std::as_const(__iter)' would be invalid: overload resolution selected deleted operator '>'}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_const_greater); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_const_greater); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because 'std::as_const(__iter) > std::as_const(__iter)' would be invalid: overload resolution selected deleted operator '>'}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_less_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_less_eq); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because '__iter <= __iter' would be invalid: overload resolution selected deleted operator '<='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_mut_less_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_mut_less_eq); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because 'std::as_const(__iter) <= __iter' would be invalid: overload resolution selected deleted operator '<='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_mut_const_less_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_mut_const_less_eq); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because '__iter <= std::as_const(__iter)' would be invalid: overload resolution selected deleted operator '<='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_const_less_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_const_less_eq); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because 'std::as_const(__iter) <= std::as_const(__iter)' would be invalid: overload resolution selected deleted operator '<='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_greater_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_greater_eq); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because '__iter >= __iter' would be invalid: overload resolution selected deleted operator '>='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_mut_greater_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_mut_greater_eq); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because 'std::as_const(__iter) >= __iter' would be invalid: overload resolution selected deleted operator '>='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_mut_const_greater_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_mut_const_greater_eq); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because '__iter >= std::as_const(__iter)' would be invalid: overload resolution selected deleted operator '>='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_const_greater_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_const_greater_eq); // expected-error-re {{static assertion failed}} // expected-note@*:* {{because 'std::as_const(__iter) >= std::as_const(__iter)' would be invalid: overload resolution selected deleted operator '>='}} } diff --git a/libcxx/test/std/containers/views/mdspan/layout_stride/extents.verify.cpp b/libcxx/test/std/containers/views/mdspan/layout_stride/extents.verify.cpp index 4742527f7af11..46f2b774bcbd9 100644 --- a/libcxx/test/std/containers/views/mdspan/layout_stride/extents.verify.cpp +++ b/libcxx/test/std/containers/views/mdspan/layout_stride/extents.verify.cpp @@ -23,11 +23,11 @@ #include void not_extents() { - // expected-error-re@*:* {{{{(static_assert|static assertion)}} failed {{.*}}layout_stride::mapping template argument must be a specialization of extents}} + // expected-error-re@*:* {{static assertion failed {{.*}}layout_stride::mapping template argument must be a specialization of extents}} [[maybe_unused]] std::layout_stride::mapping mapping; } void representable() { - // expected-error-re@*:* {{{{(static_assert|static assertion)}} failed {{.*}}layout_stride::mapping product of static extents must be representable as index_type.}} + // expected-error-re@*:* {{static assertion failed {{.*}}layout_stride::mapping product of static extents must be representable as index_type.}} [[maybe_unused]] std::layout_stride::mapping> mapping; } From cee08ff342f39f8cd2b3b66f48ecb33d8b5efe65 Mon Sep 17 00:00:00 2001 From: Joseph Huber <35342157+jhuber6@users.noreply.github.com> Date: Thu, 26 Oct 2023 10:36:34 -0500 Subject: [PATCH 088/877] [Libomptarget] Do not pass 'nogpulib' to the non-LTO Nvidia tests (#70327) Summary: For the other tests we pass `-nogpulib` to ensure that we set up the needed libraries correctly. However, this caused problems for the non-LTO build and test of Nvidia systems. In general this is because we would do a separate compile of the libomptarget device runtime and then link in that cubin. This exercised the runtime in a lot of ways it's not used to, since doing things this way was hardly expected or tested. This patch disables it only for the Nvidia non-LTO build so that we still get the effect of `--liboimptarget-nvptx-bc-path` rather than ignoring it. --- openmp/libomptarget/test/lit.cfg | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openmp/libomptarget/test/lit.cfg b/openmp/libomptarget/test/lit.cfg index 80a3c10d3a949..6dab31bd35a9f 100644 --- a/openmp/libomptarget/test/lit.cfg +++ b/openmp/libomptarget/test/lit.cfg @@ -128,7 +128,8 @@ elif config.operating_system == 'Darwin': config.test_flags += " -Wl,-rpath," + config.library_dir config.test_flags += " -Wl,-rpath," + config.omp_host_rtl_directory else: # Unices - config.test_flags += " -nogpulib" + if config.libomptarget_current_target != "nvptx64-nvidia-cuda": + config.test_flags += " -nogpulib" config.test_flags += " -Wl,-rpath," + config.library_dir config.test_flags += " -Wl,-rpath," + config.omp_host_rtl_directory config.test_flags += " -Wl,-rpath," + config.llvm_lib_directory From c65ec9d9195ad4afee2bbf69fd77607697d43480 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 26 Oct 2023 08:36:03 -0700 Subject: [PATCH 089/877] Revert "[SLP]Improve isGatherShuffledEntry by trying per-register shuffle." This reverts commit 560bad013ebcb8d2c2c1722e35270b9a70ab40ce to fix a bug reported in https://lab.llvm.org/buildbot/#/builders/5/builds/37763. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 462 +++++------------- .../X86/multi-nodes-to-shuffle.ll | 45 +- 2 files changed, 120 insertions(+), 387 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9b5da445daaab..4f82d2d1d6d91 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2507,31 +2507,17 @@ class BoUpSLP { /// instruction in the list). Instruction &getLastInstructionInBundle(const TreeEntry *E); - /// Checks if the gathered \p VL can be represented as a single register - /// shuffle(s) of previous tree entries. + /// Checks if the gathered \p VL can be represented as shuffle(s) of previous + /// tree entries. /// \param TE Tree entry checked for permutation. /// \param VL List of scalars (a subset of the TE scalar), checked for - /// permutations. Must form single-register vector. + /// permutations. /// \returns ShuffleKind, if gathered values can be represented as shuffles of - /// previous tree entries. \p Part of \p Mask is filled with the shuffle mask. + /// previous tree entries. \p Mask is filled with the shuffle mask. std::optional - isGatherShuffledSingleRegisterEntry( - const TreeEntry *TE, ArrayRef VL, MutableArrayRef Mask, - SmallVectorImpl &Entries, unsigned Part); - - /// Checks if the gathered \p VL can be represented as multi-register - /// shuffle(s) of previous tree entries. - /// \param TE Tree entry checked for permutation. - /// \param VL List of scalars (a subset of the TE scalar), checked for - /// permutations. - /// \returns per-register series of ShuffleKind, if gathered values can be - /// represented as shuffles of previous tree entries. \p Mask is filled with - /// the shuffle mask (also on per-register base). - SmallVector> - isGatherShuffledEntry( - const TreeEntry *TE, ArrayRef VL, SmallVectorImpl &Mask, - SmallVectorImpl> &Entries, - unsigned NumParts); + isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, + SmallVectorImpl &Mask, + SmallVectorImpl &Entries); /// \returns the scalarization cost for this list of values. Assuming that /// this subtree gets vectorized, we may need to extract the values from the @@ -7004,11 +6990,6 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { BoUpSLP &R; SmallPtrSetImpl &CheckedExtracts; constexpr static TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - /// While set, still trying to estimate the cost for the same nodes and we - /// can delay actual cost estimation (virtual shuffle instruction emission). - /// May help better estimate the cost if same nodes must be permuted + allows - /// to move most of the long shuffles cost estimation to TTI. - bool SameNodesEstimated = true; static Constant *getAllOnesValue(const DataLayout &DL, Type *Ty) { if (Ty->getScalarType()->isPointerTy()) { @@ -7249,49 +7230,6 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { } return Cost; } - /// Transforms mask \p CommonMask per given \p Mask to make proper set after - /// shuffle emission. - static void transformMaskAfterShuffle(MutableArrayRef CommonMask, - ArrayRef Mask) { - for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx) - if (Mask[Idx] != PoisonMaskElem) - CommonMask[Idx] = Idx; - } - /// Adds the cost of reshuffling \p E1 and \p E2 (if present), using given - /// mask \p Mask, register number \p Part, that includes \p SliceSize - /// elements. - void estimateNodesPermuteCost(const TreeEntry &E1, const TreeEntry *E2, - ArrayRef Mask, unsigned Part, - unsigned SliceSize) { - if (SameNodesEstimated) { - // Delay the cost estimation if the same nodes are reshuffling. - // If we already requested the cost of reshuffling of E1 and E2 before, no - // need to estimate another cost with the sub-Mask, instead include this - // sub-Mask into the CommonMask to estimate it later and avoid double cost - // estimation. - if ((InVectors.size() == 2 && - InVectors.front().get() == &E1 && - InVectors.back().get() == E2) || - (!E2 && InVectors.front().get() == &E1)) { - assert(all_of(ArrayRef(CommonMask).slice(Part * SliceSize, SliceSize), - [](int Idx) { return Idx == PoisonMaskElem; }) && - "Expected all poisoned elements."); - ArrayRef SubMask = - ArrayRef(Mask).slice(Part * SliceSize, SliceSize); - copy(SubMask, std::next(CommonMask.begin(), SliceSize * Part)); - return; - } - // Found non-matching nodes - need to estimate the cost for the matched - // and transform mask. - Cost += createShuffle(InVectors.front(), - InVectors.size() == 1 ? nullptr : InVectors.back(), - CommonMask); - transformMaskAfterShuffle(CommonMask, CommonMask); - } - SameNodesEstimated = false; - Cost += createShuffle(&E1, E2, Mask); - transformMaskAfterShuffle(CommonMask, Mask); - } class ShuffleCostBuilder { const TargetTransformInfo &TTI; @@ -7555,74 +7493,31 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { // into a vector and can be represented as a permutation elements in a // single input vector or of 2 input vectors. Cost += computeExtractCost(VL, Mask, ShuffleKind); - InVectors.assign(1, E); - CommonMask.assign(Mask.begin(), Mask.end()); - transformMaskAfterShuffle(CommonMask, CommonMask); - SameNodesEstimated = false; return VecBase; } - void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef Mask) { - if (&E1 == &E2) { + void add(const TreeEntry *E1, const TreeEntry *E2, ArrayRef Mask) { + if (E1 == E2) { assert(all_of(Mask, - [&](int Idx) { - return Idx < static_cast(E1.getVectorFactor()); + [=](int Idx) { + return Idx < static_cast(E1->getVectorFactor()); }) && "Expected single vector shuffle mask."); add(E1, Mask); return; } - if (InVectors.empty()) { - CommonMask.assign(Mask.begin(), Mask.end()); - InVectors.assign({&E1, &E2}); - return; - } - assert(!CommonMask.empty() && "Expected non-empty common mask."); - auto *MaskVecTy = - FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size()); - unsigned NumParts = TTI.getNumberOfParts(MaskVecTy); - assert(NumParts > 0 && NumParts < Mask.size() && - "Expected positive number of registers."); - unsigned SliceSize = Mask.size() / NumParts; - const auto *It = - find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; }); - unsigned Part = std::distance(Mask.begin(), It) / SliceSize; - estimateNodesPermuteCost(E1, &E2, Mask, Part, SliceSize); + CommonMask.assign(Mask.begin(), Mask.end()); + InVectors.assign({E1, E2}); } - void add(const TreeEntry &E1, ArrayRef Mask) { - if (InVectors.empty()) { - CommonMask.assign(Mask.begin(), Mask.end()); - InVectors.assign(1, &E1); - return; - } - assert(!CommonMask.empty() && "Expected non-empty common mask."); - auto *MaskVecTy = - FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size()); - unsigned NumParts = TTI.getNumberOfParts(MaskVecTy); - assert(NumParts > 0 && NumParts < Mask.size() && - "Expected positive number of registers."); - unsigned SliceSize = Mask.size() / NumParts; - const auto *It = - find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; }); - unsigned Part = std::distance(Mask.begin(), It) / SliceSize; - estimateNodesPermuteCost(E1, nullptr, Mask, Part, SliceSize); - if (!SameNodesEstimated && InVectors.size() == 1) - InVectors.emplace_back(&E1); + void add(const TreeEntry *E1, ArrayRef Mask) { + CommonMask.assign(Mask.begin(), Mask.end()); + InVectors.assign(1, E1); } /// Adds another one input vector and the mask for the shuffling. void add(Value *V1, ArrayRef Mask) { - if (InVectors.empty()) { - assert(CommonMask.empty() && "Expected empty input mask/vectors."); - CommonMask.assign(Mask.begin(), Mask.end()); - InVectors.assign(1, V1); - return; - } - assert(InVectors.size() == 1 && InVectors.front().is() && - !CommonMask.empty() && "Expected only single entry from extracts."); - InVectors.push_back(V1); - unsigned VF = CommonMask.size(); - for (unsigned Idx = 0; Idx < VF; ++Idx) - if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem) - CommonMask[Idx] = Mask[Idx] + VF; + assert(CommonMask.empty() && InVectors.empty() && + "Expected empty input mask/vectors."); + CommonMask.assign(Mask.begin(), Mask.end()); + InVectors.assign(1, V1); } Value *gather(ArrayRef VL, Value *Root = nullptr) { Cost += getBuildVectorCost(VL, Root); @@ -7684,16 +7579,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, ArrayRef VL = E->Scalars; Type *ScalarTy = VL[0]->getType(); - if (E->State != TreeEntry::NeedToGather) { - if (auto *SI = dyn_cast(VL[0])) - ScalarTy = SI->getValueOperand()->getType(); - else if (auto *CI = dyn_cast(VL[0])) - ScalarTy = CI->getOperand(0)->getType(); - else if (auto *IE = dyn_cast(VL[0])) - ScalarTy = IE->getOperand(1)->getType(); - } - if (!FixedVectorType::isValidElementType(ScalarTy)) - return InstructionCost::getInvalid(); + if (auto *SI = dyn_cast(VL[0])) + ScalarTy = SI->getValueOperand()->getType(); + else if (auto *CI = dyn_cast(VL[0])) + ScalarTy = CI->getOperand(0)->getType(); + else if (auto *IE = dyn_cast(VL[0])) + ScalarTy = IE->getOperand(1)->getType(); auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; @@ -7705,7 +7596,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, VecTy = FixedVectorType::get(ScalarTy, VL.size()); } unsigned EntryVF = E->getVectorFactor(); - auto *FinalVecTy = FixedVectorType::get(ScalarTy, EntryVF); + auto *FinalVecTy = FixedVectorType::get(VecTy->getElementType(), EntryVF); bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); if (E->State == TreeEntry::NeedToGather) { @@ -7738,28 +7629,20 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, SmallVector Mask; SmallVector ExtractMask; std::optional ExtractShuffle; - SmallVector> GatherShuffles; - SmallVector> Entries; + std::optional GatherShuffle; + SmallVector Entries; // Check for gathered extracts. - ExtractShuffle = - tryToGatherSingleRegisterExtractElements(GatheredScalars, ExtractMask); + ExtractShuffle = tryToGatherSingleRegisterExtractElements(GatheredScalars, ExtractMask); bool Resized = false; - unsigned NumParts = TTI->getNumberOfParts(VecTy); - if (NumParts == 0 || NumParts >= GatheredScalars.size()) - NumParts = 1; if (Value *VecBase = Estimator.adjustExtracts( - E, ExtractMask, ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc))) { + E, ExtractMask, ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc))) if (auto *VecBaseTy = dyn_cast(VecBase->getType())) if (VF == VecBaseTy->getNumElements() && GatheredScalars.size() != VF) { Resized = true; GatheredScalars.append(VF - GatheredScalars.size(), PoisonValue::get(ScalarTy)); } - } else if (ExtractShuffle && - TTI->getNumberOfParts(VecTy) == VecTy->getNumElements()) { - copy(VL, GatheredScalars.begin()); - } // Do not try to look for reshuffled loads for gathered loads (they will be // handled later), for vectorized scalars, and cases, which are definitely @@ -7769,12 +7652,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || isSplat(E->Scalars) || (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) - GatherShuffles = - isGatherShuffledEntry(E, GatheredScalars, Mask, Entries, NumParts); - if (!GatherShuffles.empty()) { - if (GatherShuffles.size() == 1 && - *GatherShuffles.front() == TTI::SK_PermuteSingleSrc && - Entries.front().front()->isSame(E->Scalars)) { + GatherShuffle = isGatherShuffledEntry(E, GatheredScalars, Mask, Entries); + if (GatherShuffle) { + assert((Entries.size() == 1 || Entries.size() == 2) && + "Expected shuffle of 1 or 2 entries."); + if (*GatherShuffle == TTI::SK_PermuteSingleSrc && + Entries.front()->isSame(E->Scalars)) { // Perfect match in the graph, will reuse the previously vectorized // node. Cost is 0. LLVM_DEBUG( @@ -7788,18 +7671,15 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, continue; } if (Mask[I] == PoisonMaskElem) - Mask[I] = Entries.front().front()->findLaneForValue(V); + Mask[I] = Entries.front()->findLaneForValue(V); } - Estimator.add(*Entries.front().front(), Mask); + Estimator.add(Entries.front(), Mask); return Estimator.finalize(E->ReuseShuffleIndices); } if (!Resized) { - if (GatheredScalars.size() != VF && - any_of(Entries, [&](ArrayRef TEs) { - return any_of(TEs, [&](const TreeEntry *TE) { - return TE->getVectorFactor() == VF; - }); - })) + unsigned VF1 = Entries.front()->getVectorFactor(); + unsigned VF2 = Entries.back()->getVectorFactor(); + if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF) GatheredScalars.append(VF - GatheredScalars.size(), PoisonValue::get(ScalarTy)); } @@ -7811,21 +7691,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size() << " entries for bundle " << shortBundleName(VL) << ".\n"); - unsigned SliceSize = E->Scalars.size() / NumParts; - SmallVector VecMask(Mask.size(), PoisonMaskElem); - for (const auto [I, TEs] : enumerate(Entries)) { - if (TEs.empty()) { - assert(!GatherShuffles[I] && - "No shuffles with empty entries list expected."); - continue; - } - assert((TEs.size() == 1 || TEs.size() == 2) && - "Expected shuffle of 1 or 2 entries."); - auto SubMask = ArrayRef(Mask).slice(I * SliceSize, SliceSize); - VecMask.assign(VecMask.size(), PoisonMaskElem); - copy(SubMask, std::next(VecMask.begin(), I * SliceSize)); - Estimator.add(*TEs.front(), *TEs.back(), VecMask); - } + Estimator.add(Entries.front(), Entries.back(), Mask); if (all_of(GatheredScalars, PoisonValue ::classof)) return Estimator.finalize(E->ReuseShuffleIndices); return Estimator.finalize( @@ -7839,19 +7705,16 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, if (!all_of(GatheredScalars, PoisonValue::classof)) { auto Gathers = ArrayRef(GatheredScalars).take_front(VL.size()); bool SameGathers = VL.equals(Gathers); - if (!SameGathers) - return Estimator.finalize( - E->ReuseShuffleIndices, E->Scalars.size(), - [&](Value *&Vec, SmallVectorImpl &Mask) { - Vec = Estimator.gather( - GatheredScalars, Constant::getNullValue(FixedVectorType::get( - ScalarTy, GatheredScalars.size()))); - }); - Value *BV = Estimator.gather(Gathers); + Value *BV = Estimator.gather( + Gathers, SameGathers ? nullptr + : Constant::getNullValue(FixedVectorType::get( + ScalarTy, GatheredScalars.size()))); SmallVector ReuseMask(Gathers.size(), PoisonMaskElem); std::iota(ReuseMask.begin(), ReuseMask.end(), 0); Estimator.add(BV, ReuseMask); } + if (ExtractShuffle) + Estimator.add(E, std::nullopt); return Estimator.finalize(E->ReuseShuffleIndices); } InstructionCost CommonCost = 0; @@ -9174,10 +9037,16 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { } std::optional -BoUpSLP::isGatherShuffledSingleRegisterEntry( - const TreeEntry *TE, ArrayRef VL, MutableArrayRef Mask, - SmallVectorImpl &Entries, unsigned Part) { +BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, + SmallVectorImpl &Mask, + SmallVectorImpl &Entries) { Entries.clear(); + // No need to check for the topmost gather node. + if (TE == VectorizableTree.front().get()) + return std::nullopt; + Mask.assign(VL.size(), PoisonMaskElem); + assert(TE->UserTreeIndices.size() == 1 && + "Expected only single user of the gather node."); // TODO: currently checking only for Scalars in the tree entry, need to count // reused elements too for better cost estimation. const EdgeInfo &TEUseEI = TE->UserTreeIndices.front(); @@ -9252,7 +9121,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( UserPHI ? UserPHI->getIncomingBlock(UseEI.EdgeIdx)->getTerminator() : &getLastInstructionInBundle(UseEI.UserTE); if (TEInsertPt == InsertPt) { - // If 2 gathers are operands of the same entry (regardless of whether + // If 2 gathers are operands of the same entry (regardless of wether // user is PHI or else), compare operands indices, use the earlier one // as the base. if (TEUseEI.UserTE == UseEI.UserTE && TEUseEI.EdgeIdx < UseEI.EdgeIdx) @@ -9317,10 +9186,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( } } - if (UsedTEs.empty()) { - Entries.clear(); + if (UsedTEs.empty()) return std::nullopt; - } unsigned VF = 0; if (UsedTEs.size() == 1) { @@ -9336,8 +9203,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( }); if (It != FirstEntries.end() && (*It)->getVectorFactor() == VL.size()) { Entries.push_back(*It); - std::iota(std::next(Mask.begin(), Part * VL.size()), - std::next(Mask.begin(), (Part + 1) * VL.size()), 0); + std::iota(Mask.begin(), Mask.end(), 0); // Clear undef scalars. for (int I = 0, Sz = VL.size(); I < Sz; ++I) if (isa(VL[I])) @@ -9474,10 +9340,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( TempEntries.push_back(Entries[I]); } Entries.swap(TempEntries); - if (EntryLanes.size() == Entries.size() && - !VL.equals(ArrayRef(TE->Scalars) - .slice(Part * VL.size(), - std::min(VL.size(), TE->Scalars.size())))) { + if (EntryLanes.size() == Entries.size() && !VL.equals(TE->Scalars)) { // We may have here 1 or 2 entries only. If the number of scalars is equal // to the number of entries, no need to do the analysis, it is not very // profitable. Since VL is not the same as TE->Scalars, it means we already @@ -9490,10 +9353,9 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( // Pair.first is the offset to the vector, while Pair.second is the index of // scalar in the list. for (const std::pair &Pair : EntryLanes) { - unsigned Idx = Part * VL.size() + Pair.second; - Mask[Idx] = Pair.first * VF + - Entries[Pair.first]->findLaneForValue(VL[Pair.second]); - IsIdentity &= Mask[Idx] == Pair.second; + Mask[Pair.second] = Pair.first * VF + + Entries[Pair.first]->findLaneForValue(VL[Pair.second]); + IsIdentity &= Mask[Pair.second] == Pair.second; } switch (Entries.size()) { case 1: @@ -9508,63 +9370,9 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( break; } Entries.clear(); - // Clear the corresponding mask elements. - std::fill(std::next(Mask.begin(), Part * VL.size()), - std::next(Mask.begin(), (Part + 1) * VL.size()), PoisonMaskElem); return std::nullopt; } -SmallVector> -BoUpSLP::isGatherShuffledEntry( - const TreeEntry *TE, ArrayRef VL, SmallVectorImpl &Mask, - SmallVectorImpl> &Entries, - unsigned NumParts) { - assert(NumParts > 0 && NumParts < VL.size() && - "Expected positive number of registers."); - Entries.clear(); - // No need to check for the topmost gather node. - if (TE == VectorizableTree.front().get()) - return {}; - Mask.assign(VL.size(), PoisonMaskElem); - assert(TE->UserTreeIndices.size() == 1 && - "Expected only single user of the gather node."); - assert(VL.size() % NumParts == 0 && - "Number of scalars must be divisible by NumParts."); - unsigned SliceSize = VL.size() / NumParts; - SmallVector> Res; - for (unsigned Part = 0; Part < NumParts; ++Part) { - ArrayRef SubVL = VL.slice(Part * SliceSize, SliceSize); - SmallVectorImpl &SubEntries = Entries.emplace_back(); - std::optional SubRes = - isGatherShuffledSingleRegisterEntry(TE, SubVL, Mask, SubEntries, Part); - if (!SubRes) - SubEntries.clear(); - Res.push_back(SubRes); - if (SubEntries.size() == 1 && - SubRes.value_or(TTI::SK_PermuteTwoSrc) == TTI::SK_PermuteSingleSrc && - SubEntries.front()->getVectorFactor() == VL.size() && - (SubEntries.front()->isSame(TE->Scalars) || - SubEntries.front()->isSame(VL))) { - Entries.clear(); - Res.clear(); - std::iota(Mask.begin(), Mask.end(), 0); - // Clear undef scalars. - for (int I = 0, Sz = VL.size(); I < Sz; ++I) - if (isa(VL[I])) - Mask[I] = PoisonMaskElem; - Entries.emplace_back(1, SubEntries.front()); - Res.push_back(TargetTransformInfo::SK_PermuteSingleSrc); - return Res; - } - } - if (all_of(Res, - [](const std::optional &SK) { return !SK; })) { - Entries.clear(); - return {}; - } - return Res; -} - InstructionCost BoUpSLP::getGatherCost(ArrayRef VL, bool ForPoisonSrc) const { // Find the type of the operands in VL. @@ -10031,13 +9839,9 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { } /// Checks if the specified entry \p E needs to be delayed because of its /// dependency nodes. - Value *needToDelay(const TreeEntry *E, - ArrayRef> Deps) { + Value *needToDelay(const TreeEntry *E, ArrayRef Deps) { // No need to delay emission if all deps are ready. - if (all_of(Deps, [](ArrayRef TEs) { - return all_of( - TEs, [](const TreeEntry *TE) { return TE->VectorizedValue; }); - })) + if (all_of(Deps, [](const TreeEntry *TE) { return TE->VectorizedValue; })) return nullptr; // Postpone gather emission, will be emitted after the end of the // process to keep correct order. @@ -10372,13 +10176,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { SmallVector Mask; SmallVector ExtractMask; std::optional ExtractShuffle; - SmallVector> GatherShuffles; - SmallVector> Entries; + std::optional GatherShuffle; + SmallVector Entries; Type *ScalarTy = GatheredScalars.front()->getType(); - unsigned NumParts = TTI->getNumberOfParts( - FixedVectorType::get(ScalarTy, GatheredScalars.size())); - if (NumParts == 0 || NumParts >= GatheredScalars.size()) - NumParts = 1; if (!all_of(GatheredScalars, UndefValue::classof)) { // Check for gathered extracts. ExtractShuffle = @@ -10397,10 +10197,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || isSplat(E->Scalars) || (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) { - GatherShuffles = - isGatherShuffledEntry(E, GatheredScalars, Mask, Entries, NumParts); + GatherShuffle = isGatherShuffledEntry(E, GatheredScalars, Mask, Entries); } - if (!GatherShuffles.empty()) { + if (GatherShuffle) { if (Value *Delayed = ShuffleBuilder.needToDelay(E, Entries)) { // Delay emission of gathers which are not ready yet. PostponedGathers.insert(E); @@ -10408,9 +10207,10 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { // process to keep correct order. return Delayed; } - if (GatherShuffles.size() == 1 && - *GatherShuffles.front() == TTI::SK_PermuteSingleSrc && - Entries.front().front()->isSame(E->Scalars)) { + assert((Entries.size() == 1 || Entries.size() == 2) && + "Expected shuffle of 1 or 2 entries."); + if (*GatherShuffle == TTI::SK_PermuteSingleSrc && + Entries.front()->isSame(E->Scalars)) { // Perfect match in the graph, will reuse the previously vectorized // node. Cost is 0. LLVM_DEBUG( @@ -10418,11 +10218,11 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { << "SLP: perfect diamond match for gather bundle " << shortBundleName(E->Scalars) << ".\n"); // Restore the mask for previous partially matched values. - const TreeEntry *FrontTE = Entries.front().front(); - if (FrontTE->ReorderIndices.empty() && - ((FrontTE->ReuseShuffleIndices.empty() && - E->Scalars.size() == FrontTE->Scalars.size()) || - (E->Scalars.size() == FrontTE->ReuseShuffleIndices.size()))) { + if (Entries.front()->ReorderIndices.empty() && + ((Entries.front()->ReuseShuffleIndices.empty() && + E->Scalars.size() == Entries.front()->Scalars.size()) || + (E->Scalars.size() == + Entries.front()->ReuseShuffleIndices.size()))) { std::iota(Mask.begin(), Mask.end(), 0); } else { for (auto [I, V] : enumerate(E->Scalars)) { @@ -10430,20 +10230,17 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { Mask[I] = PoisonMaskElem; continue; } - Mask[I] = FrontTE->findLaneForValue(V); + Mask[I] = Entries.front()->findLaneForValue(V); } } - ShuffleBuilder.add(FrontTE->VectorizedValue, Mask); + ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask); Res = ShuffleBuilder.finalize(E->getCommonMask()); return Res; } if (!Resized) { - if (GatheredScalars.size() != VF && - any_of(Entries, [&](ArrayRef TEs) { - return any_of(TEs, [&](const TreeEntry *TE) { - return TE->getVectorFactor() == VF; - }); - })) + unsigned VF1 = Entries.front()->getVectorFactor(); + unsigned VF2 = Entries.back()->getVectorFactor(); + if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF) GatheredScalars.append(VF - GatheredScalars.size(), PoisonValue::get(ScalarTy)); } @@ -10543,9 +10340,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { } } }; - if (ExtractShuffle || !GatherShuffles.empty()) { + if (ExtractShuffle || GatherShuffle) { bool IsNonPoisoned = true; - bool IsUsedInExpr = true; + bool IsUsedInExpr = false; Value *Vec1 = nullptr; if (ExtractShuffle) { // Gather of extractelements can be represented as just a shuffle of @@ -10570,53 +10367,36 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { } } if (Vec2) { - IsUsedInExpr = false; IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1) && isGuaranteedNotToBePoison(Vec2); ShuffleBuilder.add(Vec1, Vec2, ExtractMask); } else if (Vec1) { - IsUsedInExpr &= FindReusedSplat( + IsUsedInExpr = FindReusedSplat( ExtractMask, cast(Vec1->getType())->getNumElements()); ShuffleBuilder.add(Vec1, ExtractMask); IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1); } else { - IsUsedInExpr = false; ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get( ScalarTy, GatheredScalars.size())), ExtractMask); } } - if (!GatherShuffles.empty()) { - unsigned SliceSize = E->Scalars.size() / NumParts; - SmallVector VecMask(Mask.size(), PoisonMaskElem); - for (const auto [I, TEs] : enumerate(Entries)) { - if (TEs.empty()) { - assert(!GatherShuffles[I] && - "No shuffles with empty entries list expected."); - continue; - } - assert((TEs.size() == 1 || TEs.size() == 2) && - "Expected shuffle of 1 or 2 entries."); - auto SubMask = ArrayRef(Mask).slice(I * SliceSize, SliceSize); - VecMask.assign(VecMask.size(), PoisonMaskElem); - copy(SubMask, std::next(VecMask.begin(), I * SliceSize)); - if (TEs.size() == 1) { - IsUsedInExpr &= FindReusedSplat( - VecMask, - cast(TEs.front()->VectorizedValue->getType()) - ->getNumElements()); - ShuffleBuilder.add(TEs.front()->VectorizedValue, VecMask); - IsNonPoisoned &= - isGuaranteedNotToBePoison(TEs.front()->VectorizedValue); - } else { - IsUsedInExpr = false; - ShuffleBuilder.add(TEs.front()->VectorizedValue, - TEs.back()->VectorizedValue, VecMask); - IsNonPoisoned &= - isGuaranteedNotToBePoison(TEs.front()->VectorizedValue) && - isGuaranteedNotToBePoison(TEs.back()->VectorizedValue); - } + if (GatherShuffle) { + if (Entries.size() == 1) { + IsUsedInExpr = FindReusedSplat( + Mask, + cast(Entries.front()->VectorizedValue->getType()) + ->getNumElements()); + ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask); + IsNonPoisoned &= + isGuaranteedNotToBePoison(Entries.front()->VectorizedValue); + } else { + ShuffleBuilder.add(Entries.front()->VectorizedValue, + Entries.back()->VectorizedValue, Mask); + IsNonPoisoned &= + isGuaranteedNotToBePoison(Entries.front()->VectorizedValue) && + isGuaranteedNotToBePoison(Entries.back()->VectorizedValue); } } // Try to figure out best way to combine values: build a shuffle and insert @@ -10627,18 +10407,14 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { int MSz = Mask.size(); // Try to build constant vector and shuffle with it only if currently we // have a single permutation and more than 1 scalar constants. - bool IsSingleShuffle = !ExtractShuffle || GatherShuffles.empty(); + bool IsSingleShuffle = !ExtractShuffle || !GatherShuffle; bool IsIdentityShuffle = (ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc) == TTI::SK_PermuteSingleSrc && none_of(ExtractMask, [&](int I) { return I >= EMSz; }) && ShuffleVectorInst::isIdentityMask(ExtractMask, EMSz)) || - (!GatherShuffles.empty() && - all_of(GatherShuffles, - [](const std::optional &SK) { - return SK.value_or(TTI::SK_PermuteTwoSrc) == - TTI::SK_PermuteSingleSrc; - }) && + (GatherShuffle.value_or(TTI::SK_PermuteTwoSrc) == + TTI::SK_PermuteSingleSrc && none_of(Mask, [&](int I) { return I >= MSz; }) && ShuffleVectorInst::isIdentityMask(Mask, MSz)); bool EnoughConstsForShuffle = @@ -10814,13 +10590,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { continue; } - // if (any_of(E->getOperand(i), [&](Value *V) { - // auto *I = dyn_cast(V); - // return I && I->getParent() == IBB; - // })) - Builder.SetInsertPoint(IBB->getTerminator()); - // else - // Builder.SetInsertPoint(IBB->getFirstNonPHIOrDbgOrLifetime()); + Builder.SetInsertPoint(IBB->getTerminator()); Builder.SetCurrentDebugLocation(PH->getDebugLoc()); Value *Vec = vectorizeOperand(E, i, /*PostponedPHIs=*/true); NewPhi->addIncoming(Vec, IBB); @@ -11484,22 +11254,10 @@ Value *BoUpSLP::vectorizeTree( // The is because source vector that supposed to feed this gather node was // inserted at the end of the block [after stab instruction]. So we need // to adjust insertion point again to the end of block. - if (isa(UserI)) { - // Insert before all users. - Instruction *InsertPt = PrevVec->getParent()->getTerminator(); - for (User *U : PrevVec->users()) { - if (U == UserI) - continue; - auto *UI = dyn_cast(U); - if (!UI || isa(UI) || UI->getParent() != InsertPt->getParent()) - continue; - if (UI->comesBefore(InsertPt)) - InsertPt = UI; - } - Builder.SetInsertPoint(InsertPt); - } else { + if (isa(UserI)) + Builder.SetInsertPoint(PrevVec->getParent()->getTerminator()); + else Builder.SetInsertPoint(PrevVec); - } Builder.SetCurrentDebugLocation(UserI->getDebugLoc()); Value *Vec = vectorizeTree(TE, /*PostponedPHIs=*/false); PrevVec->replaceAllUsesWith(Vec); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll index e5b5a5c6c4a00..21aac98aa3ece 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-115 | FileCheck %s -; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-115 -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-107 | FileCheck %s +; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-107 -mattr=+avx2 | FileCheck %s define void @test(i64 %p0, i64 %p1, i64 %p2, i64 %p3) { ; CHECK-LABEL: @test( @@ -14,43 +14,18 @@ define void @test(i64 %p0, i64 %p1, i64 %p2, i64 %p3) { ; CHECK-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[TMP3]], [[TMP3]] ; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i64> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP4]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> [[TMP13]], <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i64> [[TMP11]], [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i64> [[TMP15]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i64> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP18:%.*]], [[BB]] ], [ [[TMP16]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP18]] = trunc <4 x i64> [[TMP8]] to <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP16:%.*]], [[BB]] ], [ [[TMP14]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP16]] = trunc <4 x i64> [[TMP8]] to <4 x i32> ; CHECK-NEXT: br label [[BB]] ; -; AVX2-LABEL: @test( -; AVX2-NEXT: entry: -; AVX2-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[P0:%.*]], i32 0 -; AVX2-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[P1:%.*]], i32 1 -; AVX2-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[P2:%.*]], i32 2 -; AVX2-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[P3:%.*]], i32 3 -; AVX2-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP3]], [[TMP3]] -; AVX2-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[TMP3]], [[TMP3]] -; AVX2-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[TMP3]], [[TMP3]] -; AVX2-NEXT: [[TMP7:%.*]] = sub <4 x i64> [[TMP5]], [[TMP6]] -; AVX2-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP4]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> -; AVX2-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP6]], <4 x i32> -; AVX2-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> -; AVX2-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP6]], <4 x i32> -; AVX2-NEXT: [[TMP13:%.*]] = or <4 x i64> [[TMP10]], [[TMP12]] -; AVX2-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32> -; AVX2-NEXT: br label [[BB:%.*]] -; AVX2: bb: -; AVX2-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP16:%.*]], [[BB]] ], [ [[TMP14]], [[ENTRY:%.*]] ] -; AVX2-NEXT: [[TMP16]] = trunc <4 x i64> [[TMP8]] to <4 x i32> -; AVX2-NEXT: br label [[BB]] -; entry: %a0 = add i64 %p0, %p0 %a1 = add i64 %p1, %p1 From 86d07154e6b74fdd93d7678c32f001a26491a481 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 26 Oct 2023 15:37:13 +0000 Subject: [PATCH 090/877] [gn build] Port a3490920615e --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 8766740a9ce83..50969e0f3f2ae 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -927,6 +927,7 @@ if (current_toolchain == default_toolchain) { "__utility/piecewise_construct.h", "__utility/priority_tag.h", "__utility/rel_ops.h", + "__utility/small_buffer.h", "__utility/swap.h", "__utility/to_underlying.h", "__utility/unreachable.h", From 196d154ab7a76e8ccb11addf61ff53387e397130 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 26 Jul 2023 07:47:08 -0700 Subject: [PATCH 091/877] [SLP]Improve isGatherShuffledEntry by trying per-register shuffle. Currently when building gather/buildvector node, we try to build nodes shuffles without taking into account separate vector registers. We can improve final codegen and the whole vectorization process by including this info into the analysis and the vector code emission, allows to emit better vectorized code. Differential Revision: https://reviews.llvm.org/D149742 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 463 +++++++++++++----- .../X86/multi-nodes-to-shuffle.ll | 45 +- 2 files changed, 388 insertions(+), 120 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4f82d2d1d6d91..bb4e743c1544a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2507,17 +2507,31 @@ class BoUpSLP { /// instruction in the list). Instruction &getLastInstructionInBundle(const TreeEntry *E); - /// Checks if the gathered \p VL can be represented as shuffle(s) of previous - /// tree entries. + /// Checks if the gathered \p VL can be represented as a single register + /// shuffle(s) of previous tree entries. /// \param TE Tree entry checked for permutation. /// \param VL List of scalars (a subset of the TE scalar), checked for - /// permutations. + /// permutations. Must form single-register vector. /// \returns ShuffleKind, if gathered values can be represented as shuffles of - /// previous tree entries. \p Mask is filled with the shuffle mask. + /// previous tree entries. \p Part of \p Mask is filled with the shuffle mask. std::optional - isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, - SmallVectorImpl &Mask, - SmallVectorImpl &Entries); + isGatherShuffledSingleRegisterEntry( + const TreeEntry *TE, ArrayRef VL, MutableArrayRef Mask, + SmallVectorImpl &Entries, unsigned Part); + + /// Checks if the gathered \p VL can be represented as multi-register + /// shuffle(s) of previous tree entries. + /// \param TE Tree entry checked for permutation. + /// \param VL List of scalars (a subset of the TE scalar), checked for + /// permutations. + /// \returns per-register series of ShuffleKind, if gathered values can be + /// represented as shuffles of previous tree entries. \p Mask is filled with + /// the shuffle mask (also on per-register base). + SmallVector> + isGatherShuffledEntry( + const TreeEntry *TE, ArrayRef VL, SmallVectorImpl &Mask, + SmallVectorImpl> &Entries, + unsigned NumParts); /// \returns the scalarization cost for this list of values. Assuming that /// this subtree gets vectorized, we may need to extract the values from the @@ -6990,6 +7004,11 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { BoUpSLP &R; SmallPtrSetImpl &CheckedExtracts; constexpr static TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + /// While set, still trying to estimate the cost for the same nodes and we + /// can delay actual cost estimation (virtual shuffle instruction emission). + /// May help better estimate the cost if same nodes must be permuted + allows + /// to move most of the long shuffles cost estimation to TTI. + bool SameNodesEstimated = true; static Constant *getAllOnesValue(const DataLayout &DL, Type *Ty) { if (Ty->getScalarType()->isPointerTy()) { @@ -7230,6 +7249,49 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { } return Cost; } + /// Transforms mask \p CommonMask per given \p Mask to make proper set after + /// shuffle emission. + static void transformMaskAfterShuffle(MutableArrayRef CommonMask, + ArrayRef Mask) { + for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx) + if (Mask[Idx] != PoisonMaskElem) + CommonMask[Idx] = Idx; + } + /// Adds the cost of reshuffling \p E1 and \p E2 (if present), using given + /// mask \p Mask, register number \p Part, that includes \p SliceSize + /// elements. + void estimateNodesPermuteCost(const TreeEntry &E1, const TreeEntry *E2, + ArrayRef Mask, unsigned Part, + unsigned SliceSize) { + if (SameNodesEstimated) { + // Delay the cost estimation if the same nodes are reshuffling. + // If we already requested the cost of reshuffling of E1 and E2 before, no + // need to estimate another cost with the sub-Mask, instead include this + // sub-Mask into the CommonMask to estimate it later and avoid double cost + // estimation. + if ((InVectors.size() == 2 && + InVectors.front().get() == &E1 && + InVectors.back().get() == E2) || + (!E2 && InVectors.front().get() == &E1)) { + assert(all_of(ArrayRef(CommonMask).slice(Part * SliceSize, SliceSize), + [](int Idx) { return Idx == PoisonMaskElem; }) && + "Expected all poisoned elements."); + ArrayRef SubMask = + ArrayRef(Mask).slice(Part * SliceSize, SliceSize); + copy(SubMask, std::next(CommonMask.begin(), SliceSize * Part)); + return; + } + // Found non-matching nodes - need to estimate the cost for the matched + // and transform mask. + Cost += createShuffle(InVectors.front(), + InVectors.size() == 1 ? nullptr : InVectors.back(), + CommonMask); + transformMaskAfterShuffle(CommonMask, CommonMask); + } + SameNodesEstimated = false; + Cost += createShuffle(&E1, E2, Mask); + transformMaskAfterShuffle(CommonMask, Mask); + } class ShuffleCostBuilder { const TargetTransformInfo &TTI; @@ -7493,31 +7555,74 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { // into a vector and can be represented as a permutation elements in a // single input vector or of 2 input vectors. Cost += computeExtractCost(VL, Mask, ShuffleKind); + InVectors.assign(1, E); + CommonMask.assign(Mask.begin(), Mask.end()); + transformMaskAfterShuffle(CommonMask, CommonMask); + SameNodesEstimated = false; return VecBase; } - void add(const TreeEntry *E1, const TreeEntry *E2, ArrayRef Mask) { - if (E1 == E2) { + void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef Mask) { + if (&E1 == &E2) { assert(all_of(Mask, - [=](int Idx) { - return Idx < static_cast(E1->getVectorFactor()); + [&](int Idx) { + return Idx < static_cast(E1.getVectorFactor()); }) && "Expected single vector shuffle mask."); add(E1, Mask); return; } - CommonMask.assign(Mask.begin(), Mask.end()); - InVectors.assign({E1, E2}); + if (InVectors.empty()) { + CommonMask.assign(Mask.begin(), Mask.end()); + InVectors.assign({&E1, &E2}); + return; + } + assert(!CommonMask.empty() && "Expected non-empty common mask."); + auto *MaskVecTy = + FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size()); + unsigned NumParts = TTI.getNumberOfParts(MaskVecTy); + assert(NumParts > 0 && NumParts < Mask.size() && + "Expected positive number of registers."); + unsigned SliceSize = Mask.size() / NumParts; + const auto *It = + find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; }); + unsigned Part = std::distance(Mask.begin(), It) / SliceSize; + estimateNodesPermuteCost(E1, &E2, Mask, Part, SliceSize); } - void add(const TreeEntry *E1, ArrayRef Mask) { - CommonMask.assign(Mask.begin(), Mask.end()); - InVectors.assign(1, E1); + void add(const TreeEntry &E1, ArrayRef Mask) { + if (InVectors.empty()) { + CommonMask.assign(Mask.begin(), Mask.end()); + InVectors.assign(1, &E1); + return; + } + assert(!CommonMask.empty() && "Expected non-empty common mask."); + auto *MaskVecTy = + FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size()); + unsigned NumParts = TTI.getNumberOfParts(MaskVecTy); + assert(NumParts > 0 && NumParts < Mask.size() && + "Expected positive number of registers."); + unsigned SliceSize = Mask.size() / NumParts; + const auto *It = + find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; }); + unsigned Part = std::distance(Mask.begin(), It) / SliceSize; + estimateNodesPermuteCost(E1, nullptr, Mask, Part, SliceSize); + if (!SameNodesEstimated && InVectors.size() == 1) + InVectors.emplace_back(&E1); } /// Adds another one input vector and the mask for the shuffling. void add(Value *V1, ArrayRef Mask) { - assert(CommonMask.empty() && InVectors.empty() && - "Expected empty input mask/vectors."); - CommonMask.assign(Mask.begin(), Mask.end()); - InVectors.assign(1, V1); + if (InVectors.empty()) { + assert(CommonMask.empty() && "Expected empty input mask/vectors."); + CommonMask.assign(Mask.begin(), Mask.end()); + InVectors.assign(1, V1); + return; + } + assert(InVectors.size() == 1 && InVectors.front().is() && + !CommonMask.empty() && "Expected only single entry from extracts."); + InVectors.push_back(V1); + unsigned VF = CommonMask.size(); + for (unsigned Idx = 0; Idx < VF; ++Idx) + if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem) + CommonMask[Idx] = Mask[Idx] + VF; } Value *gather(ArrayRef VL, Value *Root = nullptr) { Cost += getBuildVectorCost(VL, Root); @@ -7579,12 +7684,16 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, ArrayRef VL = E->Scalars; Type *ScalarTy = VL[0]->getType(); - if (auto *SI = dyn_cast(VL[0])) - ScalarTy = SI->getValueOperand()->getType(); - else if (auto *CI = dyn_cast(VL[0])) - ScalarTy = CI->getOperand(0)->getType(); - else if (auto *IE = dyn_cast(VL[0])) - ScalarTy = IE->getOperand(1)->getType(); + if (E->State != TreeEntry::NeedToGather) { + if (auto *SI = dyn_cast(VL[0])) + ScalarTy = SI->getValueOperand()->getType(); + else if (auto *CI = dyn_cast(VL[0])) + ScalarTy = CI->getOperand(0)->getType(); + else if (auto *IE = dyn_cast(VL[0])) + ScalarTy = IE->getOperand(1)->getType(); + } + if (!FixedVectorType::isValidElementType(ScalarTy)) + return InstructionCost::getInvalid(); auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; @@ -7596,7 +7705,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, VecTy = FixedVectorType::get(ScalarTy, VL.size()); } unsigned EntryVF = E->getVectorFactor(); - auto *FinalVecTy = FixedVectorType::get(VecTy->getElementType(), EntryVF); + auto *FinalVecTy = FixedVectorType::get(ScalarTy, EntryVF); bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); if (E->State == TreeEntry::NeedToGather) { @@ -7629,20 +7738,28 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, SmallVector Mask; SmallVector ExtractMask; std::optional ExtractShuffle; - std::optional GatherShuffle; - SmallVector Entries; + SmallVector> GatherShuffles; + SmallVector> Entries; // Check for gathered extracts. - ExtractShuffle = tryToGatherSingleRegisterExtractElements(GatheredScalars, ExtractMask); + ExtractShuffle = + tryToGatherSingleRegisterExtractElements(GatheredScalars, ExtractMask); bool Resized = false; + unsigned NumParts = TTI->getNumberOfParts(VecTy); + if (NumParts == 0 || NumParts >= GatheredScalars.size()) + NumParts = 1; if (Value *VecBase = Estimator.adjustExtracts( - E, ExtractMask, ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc))) + E, ExtractMask, ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc))) { if (auto *VecBaseTy = dyn_cast(VecBase->getType())) if (VF == VecBaseTy->getNumElements() && GatheredScalars.size() != VF) { Resized = true; GatheredScalars.append(VF - GatheredScalars.size(), PoisonValue::get(ScalarTy)); } + } else if (ExtractShuffle && + TTI->getNumberOfParts(VecTy) == VecTy->getNumElements()) { + copy(VL, GatheredScalars.begin()); + } // Do not try to look for reshuffled loads for gathered loads (they will be // handled later), for vectorized scalars, and cases, which are definitely @@ -7652,12 +7769,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || isSplat(E->Scalars) || (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) - GatherShuffle = isGatherShuffledEntry(E, GatheredScalars, Mask, Entries); - if (GatherShuffle) { - assert((Entries.size() == 1 || Entries.size() == 2) && - "Expected shuffle of 1 or 2 entries."); - if (*GatherShuffle == TTI::SK_PermuteSingleSrc && - Entries.front()->isSame(E->Scalars)) { + GatherShuffles = + isGatherShuffledEntry(E, GatheredScalars, Mask, Entries, NumParts); + if (!GatherShuffles.empty()) { + if (GatherShuffles.size() == 1 && + *GatherShuffles.front() == TTI::SK_PermuteSingleSrc && + Entries.front().front()->isSame(E->Scalars)) { // Perfect match in the graph, will reuse the previously vectorized // node. Cost is 0. LLVM_DEBUG( @@ -7671,15 +7788,18 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, continue; } if (Mask[I] == PoisonMaskElem) - Mask[I] = Entries.front()->findLaneForValue(V); + Mask[I] = Entries.front().front()->findLaneForValue(V); } - Estimator.add(Entries.front(), Mask); + Estimator.add(*Entries.front().front(), Mask); return Estimator.finalize(E->ReuseShuffleIndices); } if (!Resized) { - unsigned VF1 = Entries.front()->getVectorFactor(); - unsigned VF2 = Entries.back()->getVectorFactor(); - if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF) + if (GatheredScalars.size() != VF && + any_of(Entries, [&](ArrayRef TEs) { + return any_of(TEs, [&](const TreeEntry *TE) { + return TE->getVectorFactor() == VF; + }); + })) GatheredScalars.append(VF - GatheredScalars.size(), PoisonValue::get(ScalarTy)); } @@ -7691,7 +7811,21 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size() << " entries for bundle " << shortBundleName(VL) << ".\n"); - Estimator.add(Entries.front(), Entries.back(), Mask); + unsigned SliceSize = E->Scalars.size() / NumParts; + SmallVector VecMask(Mask.size(), PoisonMaskElem); + for (const auto [I, TEs] : enumerate(Entries)) { + if (TEs.empty()) { + assert(!GatherShuffles[I] && + "No shuffles with empty entries list expected."); + continue; + } + assert((TEs.size() == 1 || TEs.size() == 2) && + "Expected shuffle of 1 or 2 entries."); + auto SubMask = ArrayRef(Mask).slice(I * SliceSize, SliceSize); + VecMask.assign(VecMask.size(), PoisonMaskElem); + copy(SubMask, std::next(VecMask.begin(), I * SliceSize)); + Estimator.add(*TEs.front(), *TEs.back(), VecMask); + } if (all_of(GatheredScalars, PoisonValue ::classof)) return Estimator.finalize(E->ReuseShuffleIndices); return Estimator.finalize( @@ -7705,16 +7839,19 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, if (!all_of(GatheredScalars, PoisonValue::classof)) { auto Gathers = ArrayRef(GatheredScalars).take_front(VL.size()); bool SameGathers = VL.equals(Gathers); - Value *BV = Estimator.gather( - Gathers, SameGathers ? nullptr - : Constant::getNullValue(FixedVectorType::get( - ScalarTy, GatheredScalars.size()))); + if (!SameGathers) + return Estimator.finalize( + E->ReuseShuffleIndices, E->Scalars.size(), + [&](Value *&Vec, SmallVectorImpl &Mask) { + Vec = Estimator.gather( + GatheredScalars, Constant::getNullValue(FixedVectorType::get( + ScalarTy, GatheredScalars.size()))); + }); + Value *BV = Estimator.gather(Gathers); SmallVector ReuseMask(Gathers.size(), PoisonMaskElem); std::iota(ReuseMask.begin(), ReuseMask.end(), 0); Estimator.add(BV, ReuseMask); } - if (ExtractShuffle) - Estimator.add(E, std::nullopt); return Estimator.finalize(E->ReuseShuffleIndices); } InstructionCost CommonCost = 0; @@ -9037,16 +9174,10 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { } std::optional -BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, - SmallVectorImpl &Mask, - SmallVectorImpl &Entries) { +BoUpSLP::isGatherShuffledSingleRegisterEntry( + const TreeEntry *TE, ArrayRef VL, MutableArrayRef Mask, + SmallVectorImpl &Entries, unsigned Part) { Entries.clear(); - // No need to check for the topmost gather node. - if (TE == VectorizableTree.front().get()) - return std::nullopt; - Mask.assign(VL.size(), PoisonMaskElem); - assert(TE->UserTreeIndices.size() == 1 && - "Expected only single user of the gather node."); // TODO: currently checking only for Scalars in the tree entry, need to count // reused elements too for better cost estimation. const EdgeInfo &TEUseEI = TE->UserTreeIndices.front(); @@ -9121,7 +9252,7 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, UserPHI ? UserPHI->getIncomingBlock(UseEI.EdgeIdx)->getTerminator() : &getLastInstructionInBundle(UseEI.UserTE); if (TEInsertPt == InsertPt) { - // If 2 gathers are operands of the same entry (regardless of wether + // If 2 gathers are operands of the same entry (regardless of whether // user is PHI or else), compare operands indices, use the earlier one // as the base. if (TEUseEI.UserTE == UseEI.UserTE && TEUseEI.EdgeIdx < UseEI.EdgeIdx) @@ -9186,8 +9317,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, } } - if (UsedTEs.empty()) + if (UsedTEs.empty()) { + Entries.clear(); return std::nullopt; + } unsigned VF = 0; if (UsedTEs.size() == 1) { @@ -9203,7 +9336,8 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, }); if (It != FirstEntries.end() && (*It)->getVectorFactor() == VL.size()) { Entries.push_back(*It); - std::iota(Mask.begin(), Mask.end(), 0); + std::iota(std::next(Mask.begin(), Part * VL.size()), + std::next(Mask.begin(), (Part + 1) * VL.size()), 0); // Clear undef scalars. for (int I = 0, Sz = VL.size(); I < Sz; ++I) if (isa(VL[I])) @@ -9340,7 +9474,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, TempEntries.push_back(Entries[I]); } Entries.swap(TempEntries); - if (EntryLanes.size() == Entries.size() && !VL.equals(TE->Scalars)) { + if (EntryLanes.size() == Entries.size() && + !VL.equals(ArrayRef(TE->Scalars) + .slice(Part * VL.size(), + std::min(VL.size(), TE->Scalars.size())))) { // We may have here 1 or 2 entries only. If the number of scalars is equal // to the number of entries, no need to do the analysis, it is not very // profitable. Since VL is not the same as TE->Scalars, it means we already @@ -9353,9 +9490,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, // Pair.first is the offset to the vector, while Pair.second is the index of // scalar in the list. for (const std::pair &Pair : EntryLanes) { - Mask[Pair.second] = Pair.first * VF + - Entries[Pair.first]->findLaneForValue(VL[Pair.second]); - IsIdentity &= Mask[Pair.second] == Pair.second; + unsigned Idx = Part * VL.size() + Pair.second; + Mask[Idx] = Pair.first * VF + + Entries[Pair.first]->findLaneForValue(VL[Pair.second]); + IsIdentity &= Mask[Idx] == Pair.second; } switch (Entries.size()) { case 1: @@ -9370,9 +9508,64 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, break; } Entries.clear(); + // Clear the corresponding mask elements. + std::fill(std::next(Mask.begin(), Part * VL.size()), + std::next(Mask.begin(), (Part + 1) * VL.size()), PoisonMaskElem); return std::nullopt; } +SmallVector> +BoUpSLP::isGatherShuffledEntry( + const TreeEntry *TE, ArrayRef VL, SmallVectorImpl &Mask, + SmallVectorImpl> &Entries, + unsigned NumParts) { + assert(NumParts > 0 && NumParts < VL.size() && + "Expected positive number of registers."); + Entries.clear(); + // No need to check for the topmost gather node. + if (TE == VectorizableTree.front().get()) + return {}; + Mask.assign(VL.size(), PoisonMaskElem); + assert(TE->UserTreeIndices.size() == 1 && + "Expected only single user of the gather node."); + assert(VL.size() % NumParts == 0 && + "Number of scalars must be divisible by NumParts."); + unsigned SliceSize = VL.size() / NumParts; + SmallVector> Res; + for (unsigned Part = 0; Part < NumParts; ++Part) { + ArrayRef SubVL = VL.slice(Part * SliceSize, SliceSize); + SmallVectorImpl &SubEntries = Entries.emplace_back(); + std::optional SubRes = + isGatherShuffledSingleRegisterEntry(TE, SubVL, Mask, SubEntries, Part); + if (!SubRes) + SubEntries.clear(); + Res.push_back(SubRes); + if (SubEntries.size() == 1 && *SubRes == TTI::SK_PermuteSingleSrc && + SubEntries.front()->getVectorFactor() == VL.size() && + (SubEntries.front()->isSame(TE->Scalars) || + SubEntries.front()->isSame(VL))) { + SmallVector LocalSubEntries; + LocalSubEntries.swap(SubEntries); + Entries.clear(); + Res.clear(); + std::iota(Mask.begin(), Mask.end(), 0); + // Clear undef scalars. + for (int I = 0, Sz = VL.size(); I < Sz; ++I) + if (isa(VL[I])) + Mask[I] = PoisonMaskElem; + Entries.emplace_back(1, LocalSubEntries.front()); + Res.push_back(TargetTransformInfo::SK_PermuteSingleSrc); + return Res; + } + } + if (all_of(Res, + [](const std::optional &SK) { return !SK; })) { + Entries.clear(); + return {}; + } + return Res; +} + InstructionCost BoUpSLP::getGatherCost(ArrayRef VL, bool ForPoisonSrc) const { // Find the type of the operands in VL. @@ -9839,9 +10032,13 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { } /// Checks if the specified entry \p E needs to be delayed because of its /// dependency nodes. - Value *needToDelay(const TreeEntry *E, ArrayRef Deps) { + Value *needToDelay(const TreeEntry *E, + ArrayRef> Deps) { // No need to delay emission if all deps are ready. - if (all_of(Deps, [](const TreeEntry *TE) { return TE->VectorizedValue; })) + if (all_of(Deps, [](ArrayRef TEs) { + return all_of( + TEs, [](const TreeEntry *TE) { return TE->VectorizedValue; }); + })) return nullptr; // Postpone gather emission, will be emitted after the end of the // process to keep correct order. @@ -10176,9 +10373,13 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { SmallVector Mask; SmallVector ExtractMask; std::optional ExtractShuffle; - std::optional GatherShuffle; - SmallVector Entries; + SmallVector> GatherShuffles; + SmallVector> Entries; Type *ScalarTy = GatheredScalars.front()->getType(); + unsigned NumParts = TTI->getNumberOfParts( + FixedVectorType::get(ScalarTy, GatheredScalars.size())); + if (NumParts == 0 || NumParts >= GatheredScalars.size()) + NumParts = 1; if (!all_of(GatheredScalars, UndefValue::classof)) { // Check for gathered extracts. ExtractShuffle = @@ -10197,9 +10398,10 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || isSplat(E->Scalars) || (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) { - GatherShuffle = isGatherShuffledEntry(E, GatheredScalars, Mask, Entries); + GatherShuffles = + isGatherShuffledEntry(E, GatheredScalars, Mask, Entries, NumParts); } - if (GatherShuffle) { + if (!GatherShuffles.empty()) { if (Value *Delayed = ShuffleBuilder.needToDelay(E, Entries)) { // Delay emission of gathers which are not ready yet. PostponedGathers.insert(E); @@ -10207,10 +10409,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { // process to keep correct order. return Delayed; } - assert((Entries.size() == 1 || Entries.size() == 2) && - "Expected shuffle of 1 or 2 entries."); - if (*GatherShuffle == TTI::SK_PermuteSingleSrc && - Entries.front()->isSame(E->Scalars)) { + if (GatherShuffles.size() == 1 && + *GatherShuffles.front() == TTI::SK_PermuteSingleSrc && + Entries.front().front()->isSame(E->Scalars)) { // Perfect match in the graph, will reuse the previously vectorized // node. Cost is 0. LLVM_DEBUG( @@ -10218,11 +10419,11 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { << "SLP: perfect diamond match for gather bundle " << shortBundleName(E->Scalars) << ".\n"); // Restore the mask for previous partially matched values. - if (Entries.front()->ReorderIndices.empty() && - ((Entries.front()->ReuseShuffleIndices.empty() && - E->Scalars.size() == Entries.front()->Scalars.size()) || - (E->Scalars.size() == - Entries.front()->ReuseShuffleIndices.size()))) { + const TreeEntry *FrontTE = Entries.front().front(); + if (FrontTE->ReorderIndices.empty() && + ((FrontTE->ReuseShuffleIndices.empty() && + E->Scalars.size() == FrontTE->Scalars.size()) || + (E->Scalars.size() == FrontTE->ReuseShuffleIndices.size()))) { std::iota(Mask.begin(), Mask.end(), 0); } else { for (auto [I, V] : enumerate(E->Scalars)) { @@ -10230,17 +10431,20 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { Mask[I] = PoisonMaskElem; continue; } - Mask[I] = Entries.front()->findLaneForValue(V); + Mask[I] = FrontTE->findLaneForValue(V); } } - ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask); + ShuffleBuilder.add(FrontTE->VectorizedValue, Mask); Res = ShuffleBuilder.finalize(E->getCommonMask()); return Res; } if (!Resized) { - unsigned VF1 = Entries.front()->getVectorFactor(); - unsigned VF2 = Entries.back()->getVectorFactor(); - if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF) + if (GatheredScalars.size() != VF && + any_of(Entries, [&](ArrayRef TEs) { + return any_of(TEs, [&](const TreeEntry *TE) { + return TE->getVectorFactor() == VF; + }); + })) GatheredScalars.append(VF - GatheredScalars.size(), PoisonValue::get(ScalarTy)); } @@ -10340,9 +10544,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { } } }; - if (ExtractShuffle || GatherShuffle) { + if (ExtractShuffle || !GatherShuffles.empty()) { bool IsNonPoisoned = true; - bool IsUsedInExpr = false; + bool IsUsedInExpr = true; Value *Vec1 = nullptr; if (ExtractShuffle) { // Gather of extractelements can be represented as just a shuffle of @@ -10367,36 +10571,53 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { } } if (Vec2) { + IsUsedInExpr = false; IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1) && isGuaranteedNotToBePoison(Vec2); ShuffleBuilder.add(Vec1, Vec2, ExtractMask); } else if (Vec1) { - IsUsedInExpr = FindReusedSplat( + IsUsedInExpr &= FindReusedSplat( ExtractMask, cast(Vec1->getType())->getNumElements()); ShuffleBuilder.add(Vec1, ExtractMask); IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1); } else { + IsUsedInExpr = false; ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get( ScalarTy, GatheredScalars.size())), ExtractMask); } } - if (GatherShuffle) { - if (Entries.size() == 1) { - IsUsedInExpr = FindReusedSplat( - Mask, - cast(Entries.front()->VectorizedValue->getType()) - ->getNumElements()); - ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask); - IsNonPoisoned &= - isGuaranteedNotToBePoison(Entries.front()->VectorizedValue); - } else { - ShuffleBuilder.add(Entries.front()->VectorizedValue, - Entries.back()->VectorizedValue, Mask); - IsNonPoisoned &= - isGuaranteedNotToBePoison(Entries.front()->VectorizedValue) && - isGuaranteedNotToBePoison(Entries.back()->VectorizedValue); + if (!GatherShuffles.empty()) { + unsigned SliceSize = E->Scalars.size() / NumParts; + SmallVector VecMask(Mask.size(), PoisonMaskElem); + for (const auto [I, TEs] : enumerate(Entries)) { + if (TEs.empty()) { + assert(!GatherShuffles[I] && + "No shuffles with empty entries list expected."); + continue; + } + assert((TEs.size() == 1 || TEs.size() == 2) && + "Expected shuffle of 1 or 2 entries."); + auto SubMask = ArrayRef(Mask).slice(I * SliceSize, SliceSize); + VecMask.assign(VecMask.size(), PoisonMaskElem); + copy(SubMask, std::next(VecMask.begin(), I * SliceSize)); + if (TEs.size() == 1) { + IsUsedInExpr &= FindReusedSplat( + VecMask, + cast(TEs.front()->VectorizedValue->getType()) + ->getNumElements()); + ShuffleBuilder.add(TEs.front()->VectorizedValue, VecMask); + IsNonPoisoned &= + isGuaranteedNotToBePoison(TEs.front()->VectorizedValue); + } else { + IsUsedInExpr = false; + ShuffleBuilder.add(TEs.front()->VectorizedValue, + TEs.back()->VectorizedValue, VecMask); + IsNonPoisoned &= + isGuaranteedNotToBePoison(TEs.front()->VectorizedValue) && + isGuaranteedNotToBePoison(TEs.back()->VectorizedValue); + } } } // Try to figure out best way to combine values: build a shuffle and insert @@ -10407,14 +10628,18 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { int MSz = Mask.size(); // Try to build constant vector and shuffle with it only if currently we // have a single permutation and more than 1 scalar constants. - bool IsSingleShuffle = !ExtractShuffle || !GatherShuffle; + bool IsSingleShuffle = !ExtractShuffle || GatherShuffles.empty(); bool IsIdentityShuffle = (ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc) == TTI::SK_PermuteSingleSrc && none_of(ExtractMask, [&](int I) { return I >= EMSz; }) && ShuffleVectorInst::isIdentityMask(ExtractMask, EMSz)) || - (GatherShuffle.value_or(TTI::SK_PermuteTwoSrc) == - TTI::SK_PermuteSingleSrc && + (!GatherShuffles.empty() && + all_of(GatherShuffles, + [](const std::optional &SK) { + return SK.value_or(TTI::SK_PermuteTwoSrc) == + TTI::SK_PermuteSingleSrc; + }) && none_of(Mask, [&](int I) { return I >= MSz; }) && ShuffleVectorInst::isIdentityMask(Mask, MSz)); bool EnoughConstsForShuffle = @@ -10590,7 +10815,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { continue; } - Builder.SetInsertPoint(IBB->getTerminator()); + // if (any_of(E->getOperand(i), [&](Value *V) { + // auto *I = dyn_cast(V); + // return I && I->getParent() == IBB; + // })) + Builder.SetInsertPoint(IBB->getTerminator()); + // else + // Builder.SetInsertPoint(IBB->getFirstNonPHIOrDbgOrLifetime()); Builder.SetCurrentDebugLocation(PH->getDebugLoc()); Value *Vec = vectorizeOperand(E, i, /*PostponedPHIs=*/true); NewPhi->addIncoming(Vec, IBB); @@ -11254,10 +11485,22 @@ Value *BoUpSLP::vectorizeTree( // The is because source vector that supposed to feed this gather node was // inserted at the end of the block [after stab instruction]. So we need // to adjust insertion point again to the end of block. - if (isa(UserI)) - Builder.SetInsertPoint(PrevVec->getParent()->getTerminator()); - else + if (isa(UserI)) { + // Insert before all users. + Instruction *InsertPt = PrevVec->getParent()->getTerminator(); + for (User *U : PrevVec->users()) { + if (U == UserI) + continue; + auto *UI = dyn_cast(U); + if (!UI || isa(UI) || UI->getParent() != InsertPt->getParent()) + continue; + if (UI->comesBefore(InsertPt)) + InsertPt = UI; + } + Builder.SetInsertPoint(InsertPt); + } else { Builder.SetInsertPoint(PrevVec); + } Builder.SetCurrentDebugLocation(UserI->getDebugLoc()); Value *Vec = vectorizeTree(TE, /*PostponedPHIs=*/false); PrevVec->replaceAllUsesWith(Vec); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll index 21aac98aa3ece..e5b5a5c6c4a00 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-107 | FileCheck %s -; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-107 -mattr=+avx2 | FileCheck %s +; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-115 | FileCheck %s +; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-115 -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 define void @test(i64 %p0, i64 %p1, i64 %p2, i64 %p3) { ; CHECK-LABEL: @test( @@ -14,18 +14,43 @@ define void @test(i64 %p0, i64 %p1, i64 %p2, i64 %p3) { ; CHECK-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[TMP3]], [[TMP3]] ; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i64> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP4]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i64> [[TMP10]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> [[TMP13]], <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i64> [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i64> [[TMP15]] to <4 x i32> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP16:%.*]], [[BB]] ], [ [[TMP14]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP16]] = trunc <4 x i64> [[TMP8]] to <4 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP18:%.*]], [[BB]] ], [ [[TMP16]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP18]] = trunc <4 x i64> [[TMP8]] to <4 x i32> ; CHECK-NEXT: br label [[BB]] ; +; AVX2-LABEL: @test( +; AVX2-NEXT: entry: +; AVX2-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[P0:%.*]], i32 0 +; AVX2-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[P1:%.*]], i32 1 +; AVX2-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[P2:%.*]], i32 2 +; AVX2-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[P3:%.*]], i32 3 +; AVX2-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP3]], [[TMP3]] +; AVX2-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[TMP3]], [[TMP3]] +; AVX2-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[TMP3]], [[TMP3]] +; AVX2-NEXT: [[TMP7:%.*]] = sub <4 x i64> [[TMP5]], [[TMP6]] +; AVX2-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP4]], [[TMP7]] +; AVX2-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> +; AVX2-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP6]], <4 x i32> +; AVX2-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> +; AVX2-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP6]], <4 x i32> +; AVX2-NEXT: [[TMP13:%.*]] = or <4 x i64> [[TMP10]], [[TMP12]] +; AVX2-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32> +; AVX2-NEXT: br label [[BB:%.*]] +; AVX2: bb: +; AVX2-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP16:%.*]], [[BB]] ], [ [[TMP14]], [[ENTRY:%.*]] ] +; AVX2-NEXT: [[TMP16]] = trunc <4 x i64> [[TMP8]] to <4 x i32> +; AVX2-NEXT: br label [[BB]] +; entry: %a0 = add i64 %p0, %p0 %a1 = add i64 %p1, %p1 From a7d6039f3efb02992d64164e6778b8bebf0a526b Mon Sep 17 00:00:00 2001 From: Aviad Cohen Date: Thu, 26 Oct 2023 18:54:23 +0300 Subject: [PATCH 092/877] [mlir][linalg] Replace CopyOp from memref to linalg in linalg PromoteOp (#69154) linalg::CopyOp is much more generic and useful to promote buffers. In addition, this is linalg transform and makes more sense to use linalg operations when possible. --- .../Dialect/Linalg/Transforms/Promotion.cpp | 2 +- mlir/test/Dialect/Linalg/promote.mlir | 28 ++++++++----------- .../Dialect/Linalg/promotion_options.mlir | 6 ++-- .../Dialect/Linalg/transform-promotion.mlir | 14 +++++----- 4 files changed, 23 insertions(+), 27 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp index a131f30976661..5c140a7d692a9 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -209,7 +209,7 @@ LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions( Location loc = linalgOp.getLoc(); auto defaultCopyCallBack = [loc](OpBuilder &b, Value src, Value dst) -> LogicalResult { - b.create(loc, src, dst); + b.create(loc, src, dst); return success(); }; copyInFn = (options.copyInFn ? *(options.copyInFn) : defaultCopyCallBack); diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir index 4fa56b474f82e..fb5f357f3faa8 100644 --- a/mlir/test/Dialect/Linalg/promote.mlir +++ b/mlir/test/Dialect/Linalg/promote.mlir @@ -52,15 +52,13 @@ func.func @matmul_f32(%A: memref, %M: index, %N: index, %K: index) { // CHECK: %[[fullC:.*]] = memref.view %[[tmpC]][{{.*}}][{{.*}}] : memref<24xi8> to memref // CHECK: %[[partialC:.*]] = memref.subview %[[fullC]]{{.*}} : memref to memref> -// CHECK: memref.copy %[[vA]], %[[partialA]] : memref> to memref> -// CHECK: memref.copy %[[vB]], %[[partialB]] : memref> to memref> -// CHECK: memref.copy %[[vC]], %[[partialC]] : memref> to memref> +// CHECK: linalg.copy ins(%[[vA]] : memref>) outs(%[[partialA]] : memref>) +// CHECK: linalg.copy ins(%[[vB]] : memref>) outs(%[[partialB]] : memref>) +// CHECK: linalg.copy ins(%[[vC]] : memref>) outs(%[[partialC]] : memref>) // // CHECK: linalg.matmul ins(%[[partialA]], %[[partialB]]{{.*}} outs(%[[partialC]] // -// CHECK: memref.copy %[[partialC]], %[[vC]] : -// CHECK: memref> to -// CHECK: memref> +// CHECK: linalg.copy ins(%[[partialC]] : memref>) outs(%[[vC]] : memref>) // // CHECK-NOT: memref.dealloc %[[tmpA]] : memref<32xi8> // CHECK-NOT: memref.dealloc %[[tmpB]] : memref<48xi8> @@ -124,15 +122,13 @@ func.func @matmul_f64(%A: memref, %M: index, %N: index, %K: index) { // CHECK: %[[fullC_f64:.*]] = memref.view %[[tmpC_f64]][{{.*}}][{{.*}}] : memref<48xi8> to memref // CHECK: %[[partialC_f64:.*]] = memref.subview %[[fullC_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref to memref> -// CHECK: memref.copy %[[vA_f64]], %[[partialA_f64]] : memref> to memref> -// CHECK: memref.copy %[[vB_f64]], %[[partialB_f64]] : memref> to memref> -// CHECK: memref.copy %[[vC_f64]], %[[partialC_f64]] : memref> to memref> +// CHECK: linalg.copy ins(%[[vA_f64]] : memref>) outs(%[[partialA_f64]] : memref>) +// CHECK: linalg.copy ins(%[[vB_f64]] : memref>) outs(%[[partialB_f64]] : memref>) +// CHECK: linalg.copy ins(%[[vC_f64]] : memref>) outs(%[[partialC_f64]] : memref>) // // CHECK: linalg.matmul ins(%[[partialA_f64]], %[[partialB_f64]]{{.*}} outs(%[[partialC_f64]] // -// CHECK: memref.copy %[[partialC_f64]], %[[vC_f64]] : -// CHECK: memref> to -// CHECK: memref> +// CHECK: linalg.copy ins(%[[partialC_f64]] : memref>) outs(%[[vC_f64]] : memref>) // // CHECK: memref.dealloc %[[tmpA_f64]] : memref<64xi8> // CHECK: memref.dealloc %[[tmpB_f64]] : memref<96xi8> @@ -263,7 +259,7 @@ func.func @promote_rank_reducing_subviews(%arg0: memref> // CHECK: %[[VAL_61:.*]] = memref.view %[[VAL_60]]{{\[}}%[[VAL_56]]]{{\[}}%[[VAL_50]], %[[VAL_53]]] : memref<48xi8, #gpu.address_space> to memref> // CHECK: %[[VAL_62:.*]] = memref.subview %[[VAL_61]][0, 0] {{\[}}%[[VAL_52]], %[[VAL_55]]] [1, 1] : memref> to memref, #gpu.address_space> - // CHECK: memref.copy %[[VAL_3]], %[[VAL_24]] : memref<4x3xf32, strided<[4, 1]>, 1> to memref, #gpu.address_space> - // CHECK: memref.copy %[[VAL_4]], %[[VAL_43]] : memref<4x3xf32, strided<[4, 1]>, 1> to memref, #gpu.address_space> +// CHECK: linalg.copy ins(%[[VAL_3]] : memref<4x3xf32, strided<[4, 1]>, 1>) outs(%[[VAL_24]] : memref, #gpu.address_space>) +// CHECK: linalg.copy ins(%[[VAL_4]] : memref<4x3xf32, strided<[4, 1]>, 1>) outs(%[[VAL_43]] : memref, #gpu.address_space>) // CHECK: linalg.generic {doc = "", indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"], library_call = ""} ins(%[[VAL_24]], %[[VAL_43]] : memref, #gpu.address_space>, memref, #gpu.address_space>) outs(%[[VAL_62]] : memref, #gpu.address_space>) { // CHECK: ^bb0(%[[VAL_63:.*]]: f32, %[[VAL_64:.*]]: f32, %[[VAL_65:.*]]: f32): // CHECK: %[[VAL_66:.*]] = arith.addf %[[VAL_63]], %[[VAL_64]] : f32 @@ -376,7 +372,7 @@ func.func @linalg_generic_update_all_function_inputs_outputs(%arg0: memref<3x4xf linalg.yield %1 : f32 } - // CHECK: memref.copy %[[VAL_62]], %[[VAL_5]] : memref, #gpu.address_space> to memref<4x3xf32, strided<[4, 1]>, 1> + // CHECK: linalg.copy ins(%[[VAL_62]] : memref, #gpu.address_space>) outs(%[[VAL_5]] : memref<4x3xf32, strided<[4, 1]>, 1>) // CHECK: memref.dealloc %[[VAL_22]] : memref<48xi8, #gpu.address_space> // CHECK: memref.dealloc %[[VAL_41]] : memref<48xi8, #gpu.address_space> // CHECK: memref.dealloc %[[VAL_60]] : memref<48xi8, #gpu.address_space> diff --git a/mlir/test/Dialect/Linalg/promotion_options.mlir b/mlir/test/Dialect/Linalg/promotion_options.mlir index 760336ff34f84..3bf74b708cb82 100644 --- a/mlir/test/Dialect/Linalg/promotion_options.mlir +++ b/mlir/test/Dialect/Linalg/promotion_options.mlir @@ -27,10 +27,10 @@ func.func @gemm(%a : memref, %b : memref, %c : memref // CHECK: %[[VC:.*]] = memref.view %[[tmpC]][%[[C0]]][] : memref<1024xi8> to memref<16x16xf32> // CHECK: %[[svCC:.+]] = memref.subview %[[VC]] -// CHECK: memref.copy %[[svA]], %[[svAA]] -// CHECK: memref.copy %[[svC]], %[[svCC]] +// CHECK: linalg.copy ins(%[[svA]] : memref>) outs(%[[svAA]] : memref>) +// CHECK: linalg.copy ins(%[[svC]] : memref>) outs(%[[svCC]] : memref>) // CHECK: linalg.matmul ins(%[[VA]], %[[svB]]{{.*}} outs(%[[VC]] -// CHECK: memref.copy %[[svCC]], %[[svC]] +// CHECK: linalg.copy ins(%[[svCC]] : memref>) outs(%[[svC]] : memref>) // CHECK: memref.dealloc %[[tmpA]] // CHECK: memref.dealloc %[[tmpC]] diff --git a/mlir/test/Dialect/Linalg/transform-promotion.mlir b/mlir/test/Dialect/Linalg/transform-promotion.mlir index 362719b730334..d6112db0f7772 100644 --- a/mlir/test/Dialect/Linalg/transform-promotion.mlir +++ b/mlir/test/Dialect/Linalg/transform-promotion.mlir @@ -51,9 +51,9 @@ func.func @promote_subview_matmul(%arg0: memref to memref // CHECK: %[[l2:.*]] = memref.subview %[[v2]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] // CHECK-SAME: memref to memref> -// CHECK: memref.copy %[[s0]], %[[l0]] : memref to memref -// CHECK: memref.copy %[[s1]], %[[l1]] : memref to memref -// CHECK: memref.copy %[[s2]], %[[l2]] : memref to memref +// CHECK: linalg.copy ins(%[[s0]] : memref) outs(%[[l0]] : memref) +// CHECK: linalg.copy ins(%[[s1]] : memref) outs(%[[l1]] : memref) +// CHECK: linalg.copy ins(%[[s2]] : memref) outs(%[[l2]] : memref) // CHECK: linalg.matmul // CHECK-SAME: ins(%[[v0]], %[[v1]] : memref, memref) // CHECK-SAME: outs(%[[v2]] : memref) @@ -114,8 +114,8 @@ func.func @promote_first_subview_matmul(%arg0: memref to memref -// CHECK-NOT: memref.copy +// CHECK: linalg.copy ins(%[[s0]] : memref) outs(%[[l0]] : memref) +// CHECK-NOT: linalg.copy // CHECK: linalg.matmul // CHECK-SAME: ins(%[[v0]], %[[s1]] : memref, memref>) // CHECK-SAME: outs(%[[s2]] : memref>) @@ -149,7 +149,7 @@ func.func @aligned_promote_fill(%arg0: memref to memref // CHECK: %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref to memref> // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[v0]] : memref) -// CHECK: memref.copy %[[s0]], %[[l0]] : memref to memref +// CHECK: linalg.copy ins(%[[s0]] : memref) outs(%[[l0]] : memref) // CHECK: linalg.fill ins(%[[cf]] : f32) outs(%[[v0]] : memref) module attributes {transform.with_named_sequence} { @@ -182,7 +182,7 @@ func.func @aligned_promote_fill_complex(%arg0: memref, strided< // CHECK: %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<64000000xi8> to memref> // CHECK: %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref> to memref, strided<[?, 1], offset: ?>> // CHECK: linalg.fill ins({{.*}} : complex) outs(%[[v0]] : memref>) -// CHECK: memref.copy %[[s0]], %[[l0]] : memref, strided{{.*}}> to memref, strided{{.*}}> +// CHECK: linalg.copy ins(%[[s0]] : memref, strided{{.*}}>) outs(%[[l0]] : memref, strided{{.*}}>) // CHECK: linalg.fill ins(%[[cc]] : complex) outs(%[[v0]] : memref>) module attributes {transform.with_named_sequence} { From 09e8ef975d9970560b893f79ec283f69ea8db953 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Thu, 26 Oct 2023 11:59:17 -0400 Subject: [PATCH 093/877] Diagnose use of VLAs in a coroutine (#70341) Fixes https://github.com/llvm/llvm-project/issues/65858 --- clang/docs/ReleaseNotes.rst | 4 +++ .../clang/Basic/DiagnosticSemaKinds.td | 2 ++ clang/include/clang/Sema/ScopeInfo.h | 8 +++++ clang/lib/Sema/SemaCoroutine.cpp | 5 ++++ clang/lib/Sema/SemaType.cpp | 18 ++++++++---- clang/test/SemaCXX/coroutine-vla.cpp | 29 +++++++++++++++++++ 6 files changed, 60 insertions(+), 6 deletions(-) create mode 100644 clang/test/SemaCXX/coroutine-vla.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 074116d2edf9f..7238386231e1a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -636,6 +636,10 @@ Bug Fixes to C++ Support (`#46200 `_) (`#57812 `_) +- Diagnose use of a variable-length array in a coroutine. The design of + coroutines is such that it is not possible to support VLA use. Fixes: + (`#65858 `_) + - Fix bug where we were overriding zero-initialization of class members when default initializing a base class in a constant expression context. Fixes: (`#69890 `_) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index a673ce726d6c2..453bd8a9a3404 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -166,6 +166,8 @@ def ext_vla_folded_to_constant : ExtWarn< InGroup; def err_vla_unsupported : Error< "variable length arrays are not supported for %select{the current target|'%1'}0">; +def err_vla_in_coroutine_unsupported : Error< + "variable length arrays in a coroutine are not supported">; def note_vla_unsupported : Note< "variable length arrays are not supported for the current target">; diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h index 02b22af89ff03..b2f6e3289f41f 100644 --- a/clang/include/clang/Sema/ScopeInfo.h +++ b/clang/include/clang/Sema/ScopeInfo.h @@ -189,6 +189,9 @@ class FunctionScopeInfo { /// First SEH '__try' statement in the current function. SourceLocation FirstSEHTryLoc; + /// First use of a VLA within the current function. + SourceLocation FirstVLALoc; + private: /// Used to determine if errors occurred in this function or block. DiagnosticErrorTrap ErrorTrap; @@ -473,6 +476,11 @@ class FunctionScopeInfo { FirstSEHTryLoc = TryLoc; } + void setHasVLA(SourceLocation VLALoc) { + if (FirstVLALoc.isInvalid()) + FirstVLALoc = VLALoc; + } + bool NeedsScopeChecking() const { return !HasDroppedStmt && (HasIndirectGoto || HasMustTail || (HasBranchProtectedScope && HasBranchIntoScope)); diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index d2b0922a4bb9c..cfaa93fbea4dd 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1198,6 +1198,11 @@ void Sema::CheckCompletedCoroutineBody(FunctionDecl *FD, Stmt *&Body) { if (FD->hasAttr()) Diag(FD->getLocation(), diag::warn_always_inline_coroutine); + // The design of coroutines means we cannot allow use of VLAs within one, so + // diagnose if we've seen a VLA in the body of this function. + if (Fn->FirstVLALoc.isValid()) + Diag(Fn->FirstVLALoc, diag::err_vla_in_coroutine_unsupported); + // [stmt.return.coroutine]p1: // A coroutine shall not enclose a return statement ([stmt.return]). if (Fn->FirstReturnLoc.isValid()) { diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 28b81c1768a30..dea77fae4cadb 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -2706,12 +2706,18 @@ QualType Sema::BuildArrayType(QualType T, ArrayType::ArraySizeModifier ASM, } } - if (T->isVariableArrayType() && !Context.getTargetInfo().isVLASupported()) { - // CUDA device code and some other targets don't support VLAs. - bool IsCUDADevice = (getLangOpts().CUDA && getLangOpts().CUDAIsDevice); - targetDiag(Loc, - IsCUDADevice ? diag::err_cuda_vla : diag::err_vla_unsupported) - << (IsCUDADevice ? CurrentCUDATarget() : 0); + if (T->isVariableArrayType()) { + if (!Context.getTargetInfo().isVLASupported()) { + // CUDA device code and some other targets don't support VLAs. + bool IsCUDADevice = (getLangOpts().CUDA && getLangOpts().CUDAIsDevice); + targetDiag(Loc, + IsCUDADevice ? diag::err_cuda_vla : diag::err_vla_unsupported) + << (IsCUDADevice ? CurrentCUDATarget() : 0); + } else if (sema::FunctionScopeInfo *FSI = getCurFunction()) { + // VLAs are supported on this target, but we may need to do delayed + // checking that the VLA is not being used within a coroutine. + FSI->setHasVLA(Loc); + } } // If this is not C99, diagnose array size modifiers on non-VLAs. diff --git a/clang/test/SemaCXX/coroutine-vla.cpp b/clang/test/SemaCXX/coroutine-vla.cpp new file mode 100644 index 0000000000000..176e35f346e2b --- /dev/null +++ b/clang/test/SemaCXX/coroutine-vla.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 %s -std=c++20 -fsyntax-only -Wno-vla-cxx-extension -verify +#include "Inputs/std-coroutine.h" + +struct promise; + +struct coroutine : std::coroutine_handle { + using promise_type = ::promise; +}; + +struct promise +{ + coroutine get_return_object(); + std::suspend_always initial_suspend() noexcept; + std::suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; + +coroutine foo(int n) { + int array[n]; // expected-error {{variable length arrays in a coroutine are not supported}} + co_return; +} + +void lambda() { + [](int n) -> coroutine { + int array[n]; // expected-error {{variable length arrays in a coroutine are not supported}} + co_return; + }(10); +} From fd9b3e471e80d5c6aaf525f9fc3a04569eddb960 Mon Sep 17 00:00:00 2001 From: Aart Bik <39774503+aartbik@users.noreply.github.com> Date: Thu, 26 Oct 2023 09:06:57 -0700 Subject: [PATCH 094/877] [mlir][sparse] cleanup merger test, add header (#70279) --- .../Dialect/SparseTensor/MergerTest.cpp | 227 ++++++++---------- 1 file changed, 101 insertions(+), 126 deletions(-) diff --git a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp index 561753f631c16..e28d88e046fcd 100644 --- a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp +++ b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp @@ -1,3 +1,11 @@ +//===- MergerTest.cpp - Tests for the sparsifier's merger -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #include "mlir/Dialect/SparseTensor/Utils/Merger.h" #include "llvm/Support/Compiler.h" #include "gmock/gmock.h" @@ -73,56 +81,43 @@ namespace { /// Helper classes/functions for testing Merger. /// -/// Simple recursive data structure used to match expressions in `Merger`. -struct Pattern; -/// Since the patterns we need are rather small and short-lived, we use -/// `Pattern const&` for "pointers" to patterns, rather than using -/// something more elaborate like `std::shared_ptr const&`. -using PatternRef = const Pattern &; -struct Pattern { +/// Simple recursive data structure used to match expressions in `Merger`, +/// which uses const references into the short-lived data strucutures. +struct Match { struct Children { - Children(PatternRef e0, PatternRef e1) : e0(e0), e1(e1) {} - PatternRef e0; - PatternRef e1; + Children(const Match &e0, const Match &e1) : e0(e0), e1(e1) {} + const Match &e0; + const Match &e1; }; - TensorExp::Kind kind; + Match() : kind(TensorExp::Kind::kSynZero) {} + Match(TensorId tid) : kind(TensorExp::Kind::kTensor), tid(tid) {} + Match(TensorExp::Kind kind, const Match &e0, const Match &e1) + : kind(kind), children(e0, e1) { + assert(kind >= TensorExp::Kind::kMulF); + } + TensorExp::Kind kind; union { - /// Expressions representing tensors simply have a tensor number. TensorId tid; - - /// Tensor operations point to their children. Children children; }; - - /// Constructors. - /// Rather than using these, please use the readable builder - /// functions below to make tests more readable. - Pattern() : kind(TensorExp::Kind::kSynZero) {} - Pattern(TensorId tid) : kind(TensorExp::Kind::kTensor), tid(tid) {} - Pattern(TensorExp::Kind kind, PatternRef e0, PatternRef e1) - : kind(kind), children(e0, e1) { - assert(kind >= TensorExp::Kind::kMulF); - } }; /// -/// Readable Pattern builder functions. +/// Readable Match builder functions. /// These should be preferred over the actual constructors. /// -static Pattern tensorPattern(TensorId tid) { return Pattern(tid); } -static Pattern synZeroPattern() { return Pattern(); } +static Match tensorMatch(TensorId tid) { return Match(tid); } +static Match synZeroMatch() { return Match(); } #define IMPL_BINOP_PATTERN(OP, KIND) \ - LLVM_ATTRIBUTE_UNUSED static Pattern OP##Pattern(PatternRef e0, \ - PatternRef e1) { \ - return Pattern(KIND, e0, e1); \ + LLVM_ATTRIBUTE_UNUSED static Match OP##Match(const Match &e0, \ + const Match &e1) { \ + return Match(KIND, e0, e1); \ } - FOREVERY_BINOP(IMPL_BINOP_PATTERN) - #undef IMPL_BINOP_PATTERN class MergerTestBase : public ::testing::Test { @@ -150,9 +145,7 @@ class MergerTestBase : public ::testing::Test { LLVM_ATTRIBUTE_UNUSED ExprId OP##Expr(ExprId e0, ExprId e1) { \ return merger.addExp(KIND, e0, e1); \ } - FOREVERY_BINOP(IMPL_BINOP_EXPR) - #undef IMPL_BINOP_EXPR /// @@ -168,7 +161,7 @@ class MergerTestBase : public ::testing::Test { /// ordering within groups. If `simple` is true, then compare the /// `lat.simple` field instead to test the result after optimization. bool latPointWithinRange(LatSetId s, unsigned lo, unsigned n, - PatternRef pattern, const BitVector &bits, + const Match &pattern, const BitVector &bits, bool simple) { for (unsigned k = lo, hi = lo + n; k < hi; ++k) { if (compareExpression(merger.lat(merger.set(s)[k]).exp, pattern) && @@ -180,13 +173,13 @@ class MergerTestBase : public ::testing::Test { /// Wrapper over latPointWithinRange for readability of tests. void expectLatPointWithinRange(LatSetId s, unsigned lo, unsigned n, - PatternRef pattern, const BitVector &bits, + const Match &pattern, const BitVector &bits, bool simple = false) { EXPECT_TRUE(latPointWithinRange(s, lo, n, pattern, bits, simple)); } /// Wrapper over expectLatPointWithinRange for a single lat point. - void expectLatPoint(LatSetId s, unsigned lo, PatternRef pattern, + void expectLatPoint(LatSetId s, unsigned lo, const Match &pattern, const BitVector &bits, bool simple = false) { EXPECT_TRUE(latPointWithinRange(s, lo, 1, pattern, bits, simple)); } @@ -216,7 +209,7 @@ class MergerTestBase : public ::testing::Test { /// Compares expressions for equality. Equality is defined recursively as: /// - Operations are equal if they have the same kind and children. /// - Leaf tensors are equal if they refer to the same tensor. - bool compareExpression(ExprId e, PatternRef pattern) { + bool compareExpression(ExprId e, const Match &pattern) { const auto &tensorExp = merger.exp(e); if (tensorExp.kind != pattern.kind) return false; @@ -424,21 +417,19 @@ class MergerTest3T1LSo : public MergerTestBase { const auto t0 = tid(0); \ const auto t1 = tid(1); \ const auto t2 = tid(2); \ - PatternRef p0 = tensorPattern(t0); \ - PatternRef p1 = tensorPattern(t1); \ - PatternRef p2 = tensorPattern(t2); \ + const Match &p0 = tensorMatch(t0); \ + const Match &p1 = tensorMatch(t1); \ + const Match &p2 = tensorMatch(t2); \ auto s = merger.buildLattices(e, l0); \ expectNumLatPoints(s, 1); \ - expectLatPoint(s, 0, CONJ2##Pattern(CONJ1##Pattern(p0, p1), p2), \ + expectLatPoint(s, 0, CONJ2##Match(CONJ1##Match(p0, p1), p2), \ loopsToBits({{l0, t0}, {l0, t1}, {l0, t2}})); \ s = merger.optimizeSet(s); \ expectNumLatPoints(s, 1); \ - expectLatPoint(s, 0, CONJ2##Pattern(CONJ1##Pattern(p0, p1), p2), \ + expectLatPoint(s, 0, CONJ2##Match(CONJ1##Match(p0, p1), p2), \ loopsToBits({{l0, t1}}), true); \ } - FOREVERY_PAIR_OF_COMMON_CONJ_CONJ_BINOP(IMPL_MERGER_TEST_CONJ_CONJ_UNDEF) - #undef IMPL_MERGER_TEST_CONJ_CONJ_UNDEF /// Vector multiplication (conjunction) of 2 vectors, i.e.; @@ -461,21 +452,19 @@ FOREVERY_PAIR_OF_COMMON_CONJ_CONJ_BINOP(IMPL_MERGER_TEST_CONJ_CONJ_UNDEF) const auto t1 = tid(1); \ const auto t2 = tid(2); \ const auto t3 = tid(3); \ - PatternRef p0 = tensorPattern(t0); \ - PatternRef p1 = tensorPattern(t1); \ - PatternRef p2 = tensorPattern(t2); \ + const Match &p0 = tensorMatch(t0); \ + const Match &p1 = tensorMatch(t1); \ + const Match &p2 = tensorMatch(t2); \ auto s = merger.buildLattices(e, l0); \ expectNumLatPoints(s, 1); \ - expectLatPoint(s, 0, CONJ2##Pattern(CONJ1##Pattern(p0, p1), p2), \ + expectLatPoint(s, 0, CONJ2##Match(CONJ1##Match(p0, p1), p2), \ loopsToBits({{l0, t0}, {l0, t1}, {l0, t3}})); \ s = merger.optimizeSet(s); \ expectNumLatPoints(s, 1); \ - expectLatPoint(s, 0, CONJ2##Pattern(CONJ1##Pattern(p0, p1), p2), \ + expectLatPoint(s, 0, CONJ2##Match(CONJ1##Match(p0, p1), p2), \ loopsToBits({{l0, t3}}), true); \ } - FOREVERY_PAIR_OF_COMMON_CONJ_CONJ_BINOP(IMPL_MERGER_TEST_CONJ_CONJ_SPARSE_OUT) - #undef IMPL_MERGER_TEST_CONJ_CONJ_SPARSE_OUT /// Vector addition (disjunction) of 2 vectors. i.e.; @@ -499,26 +488,24 @@ FOREVERY_PAIR_OF_COMMON_CONJ_CONJ_BINOP(IMPL_MERGER_TEST_CONJ_CONJ_SPARSE_OUT) const auto l0 = lid(0); \ const auto t0 = tid(0); \ const auto t1 = tid(1); \ - PatternRef p0 = tensorPattern(t0); \ - PatternRef p1 = tensorPattern(t1); \ + const Match &p0 = tensorMatch(t0); \ + const Match &p1 = tensorMatch(t1); \ auto s = merger.buildLattices(e, l0); \ \ expectNumLatPoints(s, 3); \ - expectLatPoint(s, 0, OP##Pattern(p0, p1), \ + expectLatPoint(s, 0, OP##Match(p0, p1), \ loopsToBits({{l0, t0}, {l0, t1}})); \ expectLatPointWithinRange(s, 1, 2, p0, loopsToBits({{l0, t0}})); \ expectLatPointWithinRange(s, 1, 2, p1, loopsToBits({{l0, t1}})); \ \ s = merger.optimizeSet(s); \ expectNumLatPoints(s, 3); \ - expectLatPoint(s, 0, OP##Pattern(p0, p1), \ - loopsToBits({{l0, t0}, {l0, t1}}), true); \ + expectLatPoint(s, 0, OP##Match(p0, p1), loopsToBits({{l0, t0}, {l0, t1}}), \ + true); \ expectLatPointWithinRange(s, 1, 2, p0, loopsToBits({{l0, t0}}), true); \ expectLatPointWithinRange(s, 1, 2, p1, loopsToBits({{l0, t1}}), true); \ } - FOREVERY_COMMON_DISJ_BINOP(IMPL_MERGER_TEST_DISJ) - #undef IMPL_MERGER_TEST_DISJ /// Vector multiplication (conjunction) of 2 vectors, i.e.; @@ -533,22 +520,20 @@ FOREVERY_COMMON_DISJ_BINOP(IMPL_MERGER_TEST_DISJ) const auto l0 = lid(0); \ const auto t0 = tid(0); \ const auto t1 = tid(1); \ - PatternRef p0 = tensorPattern(t0); \ - PatternRef p1 = tensorPattern(t1); \ + const Match &p0 = tensorMatch(t0); \ + const Match &p1 = tensorMatch(t1); \ auto s = merger.buildLattices(e, l0); \ \ expectNumLatPoints(s, 1); \ - expectLatPoint(s, 0, OP##Pattern(p0, p1), \ + expectLatPoint(s, 0, OP##Match(p0, p1), \ loopsToBits({{l0, t0}, {l0, t1}})); \ \ s = merger.optimizeSet(s); \ expectNumLatPoints(s, 1); \ - expectLatPoint(s, 0, OP##Pattern(p0, p1), \ - loopsToBits({{l0, t0}, {l0, t1}}), true); \ + expectLatPoint(s, 0, OP##Match(p0, p1), loopsToBits({{l0, t0}, {l0, t1}}), \ + true); \ } - FOREVERY_COMMON_CONJ_BINOP(IMPL_MERGER_TEST_CONJ) - #undef IMPL_MERGER_TEST_CONJ /// Vector multiplication (conjunction) then addition (disjunction), i.e.; @@ -567,29 +552,27 @@ FOREVERY_COMMON_CONJ_BINOP(IMPL_MERGER_TEST_CONJ) const auto t0 = tid(0); \ const auto t1 = tid(1); \ const auto t2 = tid(2); \ - PatternRef p0 = tensorPattern(t0); \ - PatternRef p1 = tensorPattern(t1); \ - PatternRef p2 = tensorPattern(t2); \ + const Match &p0 = tensorMatch(t0); \ + const Match &p1 = tensorMatch(t1); \ + const Match &p2 = tensorMatch(t2); \ auto s = merger.buildLattices(e, l0); \ \ expectNumLatPoints(s, 3); \ - expectLatPoint(s, 0, DISJ##Pattern(CONJ##Pattern(p0, p1), p2), \ + expectLatPoint(s, 0, DISJ##Match(CONJ##Match(p0, p1), p2), \ loopsToBits({{l0, t0}, {l0, t1}, {l0, t2}})); \ - expectLatPointWithinRange(s, 1, 2, CONJ##Pattern(p0, p1), \ + expectLatPointWithinRange(s, 1, 2, CONJ##Match(p0, p1), \ loopsToBits({{l0, t0}, {l0, t1}})); \ expectLatPointWithinRange(s, 1, 2, p2, loopsToBits({{l0, t2}})); \ \ s = merger.optimizeSet(s); \ expectNumLatPoints(s, 3); \ - expectLatPoint(s, 0, DISJ##Pattern(CONJ##Pattern(p0, p1), p2), \ + expectLatPoint(s, 0, DISJ##Match(CONJ##Match(p0, p1), p2), \ loopsToBits({{l0, t0}, {l0, t1}, {l0, t2}})); \ - expectLatPointWithinRange(s, 1, 2, CONJ##Pattern(p0, p1), \ + expectLatPointWithinRange(s, 1, 2, CONJ##Match(p0, p1), \ loopsToBits({{l0, t0}, {l0, t1}})); \ expectLatPointWithinRange(s, 1, 2, p2, loopsToBits({{l0, t2}})); \ } - FOREVERY_PAIR_OF_COMMON_CONJ_DISJ_BINOP(IMPL_MERGER_TEST_CONJ_DISJ) - #undef IMPL_MERGER_TEST_CONJ_DISJ /// Vector addition (disjunction) then addition (disjunction), i.e.; @@ -612,19 +595,19 @@ FOREVERY_PAIR_OF_COMMON_CONJ_DISJ_BINOP(IMPL_MERGER_TEST_CONJ_DISJ) const auto t0 = tid(0); \ const auto t1 = tid(1); \ const auto t2 = tid(2); \ - PatternRef p0 = tensorPattern(t0); \ - PatternRef p1 = tensorPattern(t1); \ - PatternRef p2 = tensorPattern(t2); \ + const Match &p0 = tensorMatch(t0); \ + const Match &p1 = tensorMatch(t1); \ + const Match &p2 = tensorMatch(t2); \ auto s = merger.buildLattices(e, l0); \ \ expectNumLatPoints(s, 7); \ - expectLatPoint(s, 0, DISJ2##Pattern(DISJ1##Pattern(p0, p1), p2), \ + expectLatPoint(s, 0, DISJ2##Match(DISJ1##Match(p0, p1), p2), \ loopsToBits({{l0, t0}, {l0, t1}, {l0, t2}})); \ - expectLatPointWithinRange(s, 1, 6, DISJ2##Pattern(p1, p2), \ + expectLatPointWithinRange(s, 1, 6, DISJ2##Match(p1, p2), \ loopsToBits({{l0, t1}, {l0, t2}})); \ - expectLatPointWithinRange(s, 1, 6, DISJ2##Pattern(p0, p2), \ + expectLatPointWithinRange(s, 1, 6, DISJ2##Match(p0, p2), \ loopsToBits({{l0, t0}, {l0, t2}})); \ - expectLatPointWithinRange(s, 1, 6, DISJ1##Pattern(p0, p1), \ + expectLatPointWithinRange(s, 1, 6, DISJ1##Match(p0, p1), \ loopsToBits({{l0, t0}, {l0, t1}})); \ expectLatPointWithinRange(s, 1, 6, p2, loopsToBits({{l0, t2}})); \ expectLatPointWithinRange(s, 1, 6, p1, loopsToBits({{l0, t1}})); \ @@ -632,21 +615,19 @@ FOREVERY_PAIR_OF_COMMON_CONJ_DISJ_BINOP(IMPL_MERGER_TEST_CONJ_DISJ) \ s = merger.optimizeSet(s); \ expectNumLatPoints(s, 7); \ - expectLatPoint(s, 0, DISJ2##Pattern(DISJ1##Pattern(p0, p1), p2), \ + expectLatPoint(s, 0, DISJ2##Match(DISJ1##Match(p0, p1), p2), \ loopsToBits({{l0, t0}, {l0, t1}, {l0, t2}})); \ - expectLatPointWithinRange(s, 1, 6, DISJ2##Pattern(p1, p2), \ + expectLatPointWithinRange(s, 1, 6, DISJ2##Match(p1, p2), \ loopsToBits({{l0, t1}, {l0, t2}})); \ - expectLatPointWithinRange(s, 1, 6, DISJ2##Pattern(p0, p2), \ + expectLatPointWithinRange(s, 1, 6, DISJ2##Match(p0, p2), \ loopsToBits({{l0, t0}, {l0, t2}})); \ - expectLatPointWithinRange(s, 1, 6, DISJ1##Pattern(p0, p1), \ + expectLatPointWithinRange(s, 1, 6, DISJ1##Match(p0, p1), \ loopsToBits({{l0, t0}, {l0, t1}})); \ expectLatPointWithinRange(s, 1, 6, p2, loopsToBits({{l0, t2}})); \ expectLatPointWithinRange(s, 1, 6, p1, loopsToBits({{l0, t1}})); \ expectLatPointWithinRange(s, 1, 6, p0, loopsToBits({{l0, t0}})); \ } - FOREVERY_PAIR_OF_COMMON_DISJ_DISJ_BINOP(IMPL_MERGER_TEST_DISJ_DISJ) - #undef IMPL_MERGER_TEST_DISJ_DISJ /// Vector multiplication (conjunction) then multiplication (conjunction), i.e.; @@ -663,21 +644,19 @@ FOREVERY_PAIR_OF_COMMON_DISJ_DISJ_BINOP(IMPL_MERGER_TEST_DISJ_DISJ) const auto t0 = tid(0); \ const auto t1 = tid(1); \ const auto t2 = tid(2); \ - PatternRef p0 = tensorPattern(t0); \ - PatternRef p1 = tensorPattern(t1); \ - PatternRef p2 = tensorPattern(t2); \ + const Match &p0 = tensorMatch(t0); \ + const Match &p1 = tensorMatch(t1); \ + const Match &p2 = tensorMatch(t2); \ auto s = merger.buildLattices(e, l0); \ expectNumLatPoints(s, 1); \ - expectLatPoint(s, 0, CONJ2##Pattern(CONJ1##Pattern(p0, p1), p2), \ + expectLatPoint(s, 0, CONJ2##Match(CONJ1##Match(p0, p1), p2), \ loopsToBits({{l0, t0}, {l0, t1}, {l0, t2}})); \ s = merger.optimizeSet(s); \ expectNumLatPoints(s, 1); \ - expectLatPoint(s, 0, CONJ2##Pattern(CONJ1##Pattern(p0, p1), p2), \ + expectLatPoint(s, 0, CONJ2##Match(CONJ1##Match(p0, p1), p2), \ loopsToBits({{l0, t0}, {l0, t1}, {l0, t2}}), true); \ } - FOREVERY_PAIR_OF_COMMON_CONJ_CONJ_BINOP(IMPL_MERGER_TEST_CONJ_CONJ) - #undef IMPL_MERGER_TEST_CONJ_CONJ /// Vector addition (disjunction) of 2 vectors, i.e.; @@ -702,25 +681,23 @@ FOREVERY_PAIR_OF_COMMON_CONJ_CONJ_BINOP(IMPL_MERGER_TEST_CONJ_CONJ) const auto l0 = lid(0); \ const auto t0 = tid(0); \ const auto t1 = tid(1); \ - PatternRef p0 = tensorPattern(t0); \ - PatternRef p1 = tensorPattern(t1); \ + const Match &p0 = tensorMatch(t0); \ + const Match &p1 = tensorMatch(t1); \ auto s = merger.buildLattices(e, l0); \ \ expectNumLatPoints(s, 3); \ - expectLatPoint(s, 0, OP##Pattern(p0, p1), \ + expectLatPoint(s, 0, OP##Match(p0, p1), \ loopsToBits({{l0, t0}, {l0, t1}})); \ expectLatPointWithinRange(s, 1, 2, p0, loopsToBits({{l0, t0}})); \ expectLatPointWithinRange(s, 1, 2, p1, loopsToBits({{l0, t1}})); \ \ s = merger.optimizeSet(s); \ expectNumLatPoints(s, 2); \ - expectLatPoint(s, 0, OP##Pattern(p0, p1), \ - loopsToBits({{l0, t0}, {l0, t1}}), true); \ + expectLatPoint(s, 0, OP##Match(p0, p1), loopsToBits({{l0, t0}, {l0, t1}}), \ + true); \ expectLatPoint(s, 1, p1, loopsToBits({{l0, t1}}), true); \ } - FOREVERY_COMMON_DISJ_BINOP(IMPL_MERGER_TEST_OPTIMIZED_DISJ) - #undef IMPL_MERGER_TEST_OPTIMIZED_CONJ /// Vector multiplication (conjunction) of 2 vectors, i.e.: @@ -740,20 +717,20 @@ FOREVERY_COMMON_DISJ_BINOP(IMPL_MERGER_TEST_OPTIMIZED_DISJ) const auto l0 = lid(0); \ const auto t0 = tid(0); \ const auto t1 = tid(1); \ - PatternRef p0 = tensorPattern(t0); \ - PatternRef p1 = tensorPattern(t1); \ + const Match &p0 = tensorMatch(t0); \ + const Match &p1 = tensorMatch(t1); \ auto s = merger.buildLattices(e, l0); \ \ expectNumLatPoints(s, 1); \ - expectLatPoint(s, 0, OP##Pattern(p0, p1), \ + expectLatPoint(s, 0, OP##Match(p0, p1), \ loopsToBits({{l0, t0}, {l0, t1}})); \ \ s = merger.optimizeSet(s); \ expectNumLatPoints(s, 1); \ - expectLatPoint(s, 0, OP##Pattern(p0, p1), loopsToBits({{l0, t0}}), true); \ + expectLatPoint(s, 0, OP##Match(p0, p1), loopsToBits({{l0, t0}}), true); \ } - FOREVERY_COMMON_CONJ_BINOP(IMPL_MERGER_TEST_OPTIMIZED_CONJ) +#undef IMPL_MERGER_TEST_OPTIMIZED_CONJ /// Vector element-wise comparison (disjunction) of 2 vectors. i.e.; /// a(i) = b(i) + c(i) @@ -775,20 +752,20 @@ TEST_F(MergerTest3T1L, vector_cmp) { const auto l0 = lid(0); const auto t0 = tid(0); const auto t1 = tid(1); - PatternRef zero = synZeroPattern(); - PatternRef p0 = tensorPattern(t0); - PatternRef p1 = tensorPattern(t1); + const Match &zero = synZeroMatch(); + const Match &p0 = tensorMatch(t0); + const Match &p1 = tensorMatch(t1); auto s = merger.buildLattices(e, l0); - expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}})); - expectLatPointWithinRange(s, 1, 2, cmpiPattern(p0, zero), + expectLatPoint(s, 0, cmpiMatch(p0, p1), loopsToBits({{l0, t0}, {l0, t1}})); + expectLatPointWithinRange(s, 1, 2, cmpiMatch(p0, zero), loopsToBits({{l0, t0}})); - expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1), + expectLatPointWithinRange(s, 1, 2, cmpiMatch(zero, p1), loopsToBits({{l0, t1}})); s = merger.optimizeSet(s); - expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}})); - expectLatPointWithinRange(s, 1, 2, cmpiPattern(p0, zero), + expectLatPoint(s, 0, cmpiMatch(p0, p1), loopsToBits({{l0, t0}, {l0, t1}})); + expectLatPointWithinRange(s, 1, 2, cmpiMatch(p0, zero), loopsToBits({{l0, t0}})); - expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1), + expectLatPointWithinRange(s, 1, 2, cmpiMatch(zero, p1), loopsToBits({{l0, t1}})); } @@ -813,19 +790,17 @@ TEST_F(MergerTest3T1LD, vector_cmp) { const auto l0 = lid(0); const auto t0 = tid(0); const auto t1 = tid(1); - PatternRef zero = synZeroPattern(); - PatternRef p0 = tensorPattern(t0); - PatternRef p1 = tensorPattern(t1); + const Match &zero = synZeroMatch(); + const Match &p0 = tensorMatch(t0); + const Match &p1 = tensorMatch(t1); auto s = merger.buildLattices(e, l0); - expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}})); - expectLatPointWithinRange(s, 1, 2, cmpiPattern(p0, zero), + expectLatPoint(s, 0, cmpiMatch(p0, p1), loopsToBits({{l0, t0}, {l0, t1}})); + expectLatPointWithinRange(s, 1, 2, cmpiMatch(p0, zero), loopsToBits({{l0, t0}})); - expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1), + expectLatPointWithinRange(s, 1, 2, cmpiMatch(zero, p1), loopsToBits({{l0, t1}})); s = merger.optimizeSet(s); - expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}})); - expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1), + expectLatPoint(s, 0, cmpiMatch(p0, p1), loopsToBits({{l0, t0}, {l0, t1}})); + expectLatPointWithinRange(s, 1, 2, cmpiMatch(zero, p1), loopsToBits({{l0, t1}})); } - -#undef IMPL_MERGER_TEST_OPTIMIZED_CONJ From a13696fd8490f67b6ad119fcb6fda20e1fd3a089 Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Thu, 26 Oct 2023 15:55:24 +0000 Subject: [PATCH 095/877] [Flang][OpenMP] Port a few parallel tests to HLFIR flow These are copies of tests in flang/test/Lower/OpenMP/FIR. --- .../parallel-firstprivate-clause-scalar.f90 | 202 ++++++++++ .../parallel-lastprivate-clause-scalar.f90 | 253 ++++++++++++ .../Lower/OpenMP/parallel-private-clause.f90 | 380 ++++++++++++++++++ flang/test/Lower/OpenMP/parallel-wsloop.f90 | 298 ++++++++++++++ flang/test/Lower/OpenMP/parallel.f90 | 206 ++++++++++ 5 files changed, 1339 insertions(+) create mode 100644 flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90 create mode 100644 flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 create mode 100644 flang/test/Lower/OpenMP/parallel-private-clause.f90 create mode 100644 flang/test/Lower/OpenMP/parallel-wsloop.f90 create mode 100644 flang/test/Lower/OpenMP/parallel.f90 diff --git a/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90 new file mode 100644 index 0000000000000..6402f98a2addc --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90 @@ -0,0 +1,202 @@ +! This test checks lowering of `FIRSTPRIVATE` clause for scalar types. + +! REQUIRES: shell +! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s --check-prefix=CHECK + +!CHECK-DAG: func @_QPfirstprivate_complex(%[[ARG1:.*]]: !fir.ref>{{.*}}, %[[ARG2:.*]]: !fir.ref>{{.*}}) { +!CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFfirstprivate_complexEarg1"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[ARG2_DECL:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFfirstprivate_complexEarg2"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: omp.parallel { +!CHECK: %[[ARG1_PVT:.*]] = fir.alloca !fir.complex<4> {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_complexEarg1"} +!CHECK: %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_complexEarg1"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref> +!CHECK: hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : !fir.complex<4>, !fir.ref> +!CHECK: %[[ARG2_PVT:.*]] = fir.alloca !fir.complex<8> {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_complexEarg2"} +!CHECK: %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_complexEarg2"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref> +!CHECK: hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : !fir.complex<8>, !fir.ref> +!CHECK: fir.call @_QPfoo(%[[ARG1_PVT_DECL]]#1, %[[ARG2_PVT_DECL]]#1) {{.*}}: (!fir.ref>, !fir.ref>) -> () +!CHECK: omp.terminator +!CHECK: } + +subroutine firstprivate_complex(arg1, arg2) + complex(4) :: arg1 + complex(8) :: arg2 + +!$OMP PARALLEL FIRSTPRIVATE(arg1, arg2) + call foo(arg1, arg2) +!$OMP END PARALLEL + +end subroutine + +!CHECK-DAG: func @_QPfirstprivate_integer(%[[ARG1:.*]]: !fir.ref{{.*}}, %[[ARG2:.*]]: !fir.ref{{.*}}, %[[ARG3:.*]]: !fir.ref{{.*}}, %[[ARG4:.*]]: !fir.ref{{.*}}, %[[ARG5:.*]]: !fir.ref{{.*}}, %[[ARG6:.*]]: !fir.ref{{.*}}) { +!CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFfirstprivate_integerEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG2_DECL:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFfirstprivate_integerEarg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG3_DECL:.*]]:2 = hlfir.declare %[[ARG3]] {uniq_name = "_QFfirstprivate_integerEarg3"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG4_DECL:.*]]:2 = hlfir.declare %[[ARG4]] {uniq_name = "_QFfirstprivate_integerEarg4"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG5_DECL:.*]]:2 = hlfir.declare %[[ARG5]] {uniq_name = "_QFfirstprivate_integerEarg5"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG6_DECL:.*]]:2 = hlfir.declare %[[ARG6]] {uniq_name = "_QFfirstprivate_integerEarg6"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.parallel { +!CHECK: %[[ARG1_PVT:.*]] = fir.alloca i32 {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_integerEarg1"} +!CHECK: %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_integerEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK: %[[ARG2_PVT:.*]] = fir.alloca i8 {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_integerEarg2"} +!CHECK: %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_integerEarg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : i8, !fir.ref +!CHECK: %[[ARG3_PVT:.*]] = fir.alloca i16 {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_integerEarg3"} +!CHECK: %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_integerEarg3"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : i16, !fir.ref +!CHECK: %[[ARG4_PVT:.*]] = fir.alloca i32 {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_integerEarg4"} +!CHECK: %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_integerEarg4"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK: %[[ARG5_PVT:.*]] = fir.alloca i64 {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_integerEarg5"} +!CHECK: %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_integerEarg5"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : i64, !fir.ref +!CHECK: %[[ARG6_PVT:.*]] = fir.alloca i128 {bindc_name = "arg6", pinned, uniq_name = "_QFfirstprivate_integerEarg6"} +!CHECK: %[[ARG6_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG6_PVT]] {uniq_name = "_QFfirstprivate_integerEarg6"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG6_VAL:.*]] = fir.load %[[ARG6_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[ARG6_VAL]] to %[[ARG6_PVT_DECL]]#0 temporary_lhs : i128, !fir.ref +!CHECK: fir.call @_QPbar(%[[ARG1_PVT_DECL]]#1, %[[ARG2_PVT_DECL]]#1, %[[ARG3_PVT_DECL]]#1, %[[ARG4_PVT_DECL]]#1, +!%[[ARG5_PVT_DECL]]#1, %[[ARG6_PVT_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref) -> () +!CHECK: omp.terminator +!CHECK: } + +subroutine firstprivate_integer(arg1, arg2, arg3, arg4, arg5, arg6) + integer :: arg1 + integer(kind=1) :: arg2 + integer(kind=2) :: arg3 + integer(kind=4) :: arg4 + integer(kind=8) :: arg5 + integer(kind=16) :: arg6 + +!$OMP PARALLEL FIRSTPRIVATE(arg1, arg2, arg3, arg4, arg5, arg6) + call bar(arg1, arg2, arg3, arg4, arg5, arg6) +!$OMP END PARALLEL + +end subroutine + +!CHECK-DAG: func @_QPfirstprivate_logical(%[[ARG1:.*]]: !fir.ref>{{.*}}, %[[ARG2:.*]]: !fir.ref>{{.*}}, %[[ARG3:.*]]: !fir.ref>{{.*}}, %[[ARG4:.*]]: !fir.ref>{{.*}}, %[[ARG5:.*]]: !fir.ref>{{.*}}) { +!CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFfirstprivate_logicalEarg1"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[ARG2_DECL:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFfirstprivate_logicalEarg2"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[ARG3_DECL:.*]]:2 = hlfir.declare %[[ARG3]] {uniq_name = "_QFfirstprivate_logicalEarg3"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[ARG4_DECL:.*]]:2 = hlfir.declare %[[ARG4]] {uniq_name = "_QFfirstprivate_logicalEarg4"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[ARG5_DECL:.*]]:2 = hlfir.declare %[[ARG5]] {uniq_name = "_QFfirstprivate_logicalEarg5"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: omp.parallel { +!CHECK: %[[ARG1_PVT:.*]] = fir.alloca !fir.logical<4> {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_logicalEarg1"} +!CHECK: %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg1"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref> +!CHECK: hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : !fir.logical<4>, !fir.ref> +!CHECK: %[[ARG2_PVT:.*]] = fir.alloca !fir.logical<1> {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_logicalEarg2"} +!CHECK: %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg2"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref> +!CHECK: hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : !fir.logical<1>, !fir.ref> +!CHECK: %[[ARG3_PVT:.*]] = fir.alloca !fir.logical<2> {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_logicalEarg3"} +!CHECK: %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg3"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref> +!CHECK: hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : !fir.logical<2>, !fir.ref> +!CHECK: %[[ARG4_PVT:.*]] = fir.alloca !fir.logical<4> {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_logicalEarg4"} +!CHECK: %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg4"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref> +!CHECK: hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : !fir.logical<4>, !fir.ref> +!CHECK: %[[ARG5_PVT:.*]] = fir.alloca !fir.logical<8> {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_logicalEarg5"} +!CHECK: %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg5"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref> +!CHECK: hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : !fir.logical<8>, !fir.ref> +!CHECK: fir.call @_QPbaz(%[[ARG1_PVT_DECL]]#1, %[[ARG2_PVT_DECL]]#1, %[[ARG3_PVT_DECL]]#1, %[[ARG4_PVT_DECL]]#1, %[[ARG5_PVT_DECL]]#1) {{.*}}: (!fir.ref>, !fir.ref>, !fir.ref>, !fir.ref>, !fir.ref>) -> () +!CHECK: omp.terminator +!CHECK: } + +subroutine firstprivate_logical(arg1, arg2, arg3, arg4, arg5) + logical :: arg1 + logical(kind=1) :: arg2 + logical(kind=2) :: arg3 + logical(kind=4) :: arg4 + logical(kind=8) :: arg5 + +!$OMP PARALLEL FIRSTPRIVATE(arg1, arg2, arg3, arg4, arg5) + call baz(arg1, arg2, arg3, arg4, arg5) +!$OMP END PARALLEL + +end subroutine + +!CHECK-DAG: func @_QPfirstprivate_real(%[[ARG1:.*]]: !fir.ref{{.*}}, %[[ARG2:.*]]: !fir.ref{{.*}}, %[[ARG3:.*]]: !fir.ref{{.*}}, %[[ARG4:.*]]: !fir.ref{{.*}}, %[[ARG5:.*]]: !fir.ref{{.*}}, %[[ARG6:.*]]: !fir.ref{{.*}}) { +!CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFfirstprivate_realEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG2_DECL:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFfirstprivate_realEarg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG3_DECL:.*]]:2 = hlfir.declare %[[ARG3]] {uniq_name = "_QFfirstprivate_realEarg3"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG4_DECL:.*]]:2 = hlfir.declare %[[ARG4]] {uniq_name = "_QFfirstprivate_realEarg4"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG5_DECL:.*]]:2 = hlfir.declare %[[ARG5]] {uniq_name = "_QFfirstprivate_realEarg5"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG6_DECL:.*]]:2 = hlfir.declare %[[ARG6]] {uniq_name = "_QFfirstprivate_realEarg6"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.parallel { +!CHECK: %[[ARG1_PVT:.*]] = fir.alloca f32 {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_realEarg1"} +!CHECK: %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_realEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : f32, !fir.ref +!CHECK: %[[ARG2_PVT:.*]] = fir.alloca f16 {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_realEarg2"} +!CHECK: %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_realEarg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : f16, !fir.ref +!CHECK: %[[ARG3_PVT:.*]] = fir.alloca f32 {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_realEarg3"} +!CHECK: %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_realEarg3"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : f32, !fir.ref +!CHECK: %[[ARG4_PVT:.*]] = fir.alloca f64 {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_realEarg4"} +!CHECK: %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_realEarg4"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : f64, !fir.ref +!CHECK: %[[ARG5_PVT:.*]] = fir.alloca f80 {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_realEarg5"} +!CHECK: %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_realEarg5"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : f80, !fir.ref +!CHECK: %[[ARG6_PVT:.*]] = fir.alloca f128 {bindc_name = "arg6", pinned, uniq_name = "_QFfirstprivate_realEarg6"} +!CHECK: %[[ARG6_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG6_PVT]] {uniq_name = "_QFfirstprivate_realEarg6"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG6_VAL:.*]] = fir.load %[[ARG6_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[ARG6_VAL]] to %[[ARG6_PVT_DECL]]#0 temporary_lhs : f128, !fir.ref +!CHECK: fir.call @_QPqux(%[[ARG1_PVT_DECL]]#1, %[[ARG2_PVT_DECL]]#1, %[[ARG3_PVT_DECL]]#1, %[[ARG4_PVT_DECL]]#1, %[[ARG5_PVT_DECL]]#1, %[[ARG6_PVT_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref) -> () +!CHECK: omp.terminator +!CHECK: } + +subroutine firstprivate_real(arg1, arg2, arg3, arg4, arg5, arg6) + real :: arg1 + real(kind=2) :: arg2 + real(kind=4) :: arg3 + real(kind=8) :: arg4 + real(kind=10) :: arg5 + real(kind=16) :: arg6 + +!$OMP PARALLEL FIRSTPRIVATE(arg1, arg2, arg3, arg4, arg5, arg6) + call qux(arg1, arg2, arg3, arg4, arg5, arg6) +!$OMP END PARALLEL + +end subroutine + +!CHECK-LABEL: func.func @_QPmultiple_firstprivate( +!CHECK-SAME: %[[A_ADDR:.*]]: !fir.ref {fir.bindc_name = "a"}, +!CHECK-SAME: %[[B_ADDR:.*]]: !fir.ref {fir.bindc_name = "b"}) { +!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %[[A_ADDR]] {uniq_name = "_QFmultiple_firstprivateEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[B_DECL:.*]]:2 = hlfir.declare %[[B_ADDR]] {uniq_name = "_QFmultiple_firstprivateEb"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.parallel { +!CHECK: %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFmultiple_firstprivateEa"} +!CHECK: %[[A_PRIV_DECL:.*]]:2 = hlfir.declare %[[A_PRIV_ADDR]] {uniq_name = "_QFmultiple_firstprivateEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[A:.*]] = fir.load %[[A_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[A]] to %[[A_PRIV_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK: %[[B_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "b", pinned, uniq_name = "_QFmultiple_firstprivateEb"} +!CHECK: %[[B_PRIV_DECL:.*]]:2 = hlfir.declare %[[B_PRIV_ADDR]] {uniq_name = "_QFmultiple_firstprivateEb"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[B:.*]] = fir.load %[[B_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[B]] to %[[B_PRIV_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK: fir.call @_QPquux(%[[A_PRIV_DECL]]#1, %[[B_PRIV_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref) -> () +!CHECK: omp.terminator +!CHECK: } +!CHECK: return +!CHECK: } + +subroutine multiple_firstprivate(a, b) + integer :: a, b +!$OMP PARALLEL FIRSTPRIVATE(a) FIRSTPRIVATE(b) + call quux(a, b) +!$OMP END PARALLEL +end subroutine multiple_firstprivate diff --git a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 new file mode 100644 index 0000000000000..abd14f455123b --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 @@ -0,0 +1,253 @@ +! This test checks lowering of `LASTPRIVATE` clause for scalar types. + +! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s +! RUN: flang-new -fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s + +!CHECK: func @_QPlastprivate_character(%[[ARG1:.*]]: !fir.boxchar<1>{{.*}}) { +!CHECK-DAG: %[[ARG1_UNBOX:.*]]:2 = fir.unboxchar +!CHECK-DAG: %[[FIVE:.*]] = arith.constant 5 : index +!CHECK-DAG: %[[ARG1_REF:.*]] = fir.convert %[[ARG1_UNBOX]]#0 : (!fir.ref>) -> !fir.ref> +!CHECK-DAG: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1_REF]] typeparams %[[FIVE]] {uniq_name = "_QFlastprivate_characterEarg1"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) + +!CHECK: omp.parallel { +!CHECK-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1", +!CHECK-DAG: %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] typeparams %[[FIVE]] {uniq_name = "_QFlastprivate_characterEarg1"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) + +! Check that we are accessing the clone inside the loop +!CHECK-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK-DAG: %[[NEG_ONE:.*]] = arith.constant -1 : i32 +!CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQcl. +!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] +!CHECK-NEXT: %[[CNST:.*]] = arith.constant +!CHECK-NEXT: %[[CALL_BEGIN_IO:.*]] = fir.call @_FortranAioBeginExternalListOutput(%[[NEG_ONE]], %[[CVT0]], %[[CNST]]) {{.*}}: (i32, !fir.ref, i32) -> !fir.ref +!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT_DECL]]#1 +!CHECK-NEXT: %[[CVT_0_2:.*]] = fir.convert %[[FIVE]] +!CHECK-NEXT: %[[CALL_OP_ASCII:.*]] = fir.call @_FortranAioOutputAscii(%[[CALL_BEGIN_IO]], %[[CVT_0_1]], %[[CVT_0_2]]) +!CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]]) + +! Testing last iteration check +!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 +!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32 +!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32 +!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1 +!CHECK: fir.if %[[IV_CMP]] { +!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref + +! Testing lastprivate val update +!CHECK-DAG: hlfir.assign %[[ARG1_PVT_DECL]]#0 to %[[ARG1_DECL]]#0 temporary_lhs : !fir.ref>, !fir.ref> +!CHECK-DAG: } +!CHECK-DAG: omp.yield + +subroutine lastprivate_character(arg1) + character(5) :: arg1 +!$OMP PARALLEL +!$OMP DO LASTPRIVATE(arg1) +do n = 1, 5 + arg1(n:n) = 'c' + print *, arg1 +end do +!$OMP END DO +!$OMP END PARALLEL +end subroutine + +!CHECK: func @_QPlastprivate_int(%[[ARG1:.*]]: !fir.ref {fir.bindc_name = "arg1"}) { +!CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFlastprivate_intEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK-DAG: omp.parallel { +!CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1" +!CHECK-DAG: %[[CLONE_DECL:.*]]:2 = hlfir.declare %[[CLONE]] {uniq_name = "_QFlastprivate_intEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { + +! Testing last iteration check +!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 +!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32 +!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32 +!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1 +!CHECK: fir.if %[[IV_CMP]] { +!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref + +! Testing lastprivate val update +!CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[CLONE_LD]] to %[[ARG1_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK-DAG: } +!CHECK-DAG: omp.yield + +subroutine lastprivate_int(arg1) + integer :: arg1 +!$OMP PARALLEL +!$OMP DO LASTPRIVATE(arg1) +do n = 1, 5 + arg1 = 2 + print *, arg1 +end do +!$OMP END DO +!$OMP END PARALLEL +print *, arg1 +end subroutine + +!CHECK: func.func @_QPmult_lastprivate_int(%[[ARG1:.*]]: !fir.ref {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "arg2"}) { +!CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFmult_lastprivate_intEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG2_DECL:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFmult_lastprivate_intEarg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.parallel { +!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1" +!CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_intEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" +!CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_intEarg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { + +! Testing last iteration check +!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 +!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32 +!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32 +!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1 +!CHECK: fir.if %[[IV_CMP]] { +!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref +! Testing lastprivate val update +!CHECK-DAG: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1_DECL]]#0 : !fir.ref +!CHECK-DAG: hlfir.assign %[[CLONE_LD1]] to %[[ARG1_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK-DAG: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2_DECL]]#0 : !fir.ref +!CHECK-DAG: hlfir.assign %[[CLONE_LD2]] to %[[ARG2_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK: } +!CHECK: omp.yield + +subroutine mult_lastprivate_int(arg1, arg2) + integer :: arg1, arg2 +!$OMP PARALLEL +!$OMP DO LASTPRIVATE(arg1) LASTPRIVATE(arg2) +do n = 1, 5 + arg1 = 2 + arg2 = 3 + print *, arg1, arg2 +end do +!$OMP END DO +!$OMP END PARALLEL +print *, arg1, arg2 +end subroutine + +!CHECK: func.func @_QPmult_lastprivate_int2(%[[ARG1:.*]]: !fir.ref {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "arg2"}) { +!CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFmult_lastprivate_int2Earg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG2_DECL:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFmult_lastprivate_int2Earg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.parallel { +!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1" +!CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_int2Earg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" +!CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_int2Earg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { + +!Testing last iteration check +!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 +!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32 +!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32 +!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1 +!CHECK: fir.if %[[IV_CMP]] { +!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref +!Testing lastprivate val update +!CHECK-DAG: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2_DECL]]#0 : !fir.ref +!CHECK-DAG: hlfir.assign %[[CLONE_LD2]] to %[[ARG2_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK-DAG: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1_DECL]]#0 : !fir.ref +!CHECK-DAG: hlfir.assign %[[CLONE_LD1]] to %[[ARG1_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK: } +!CHECK: omp.yield + +subroutine mult_lastprivate_int2(arg1, arg2) + integer :: arg1, arg2 +!$OMP PARALLEL +!$OMP DO LASTPRIVATE(arg1, arg2) +do n = 1, 5 + arg1 = 2 + arg2 = 3 + print *, arg1, arg2 +end do +!$OMP END DO +!$OMP END PARALLEL +print *, arg1, arg2 +end subroutine + +!CHECK: func.func @_QPfirstpriv_lastpriv_int(%[[ARG1:.*]]: !fir.ref {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "arg2"}) { +!CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFfirstpriv_lastpriv_intEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG2_DECL:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFfirstpriv_lastpriv_intEarg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.parallel { +! Firstprivate update +!CHECK: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1" +!CHECK: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFfirstpriv_lastpriv_intEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[FPV_LD:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref +!CHECK: hlfir.assign %[[FPV_LD]] to %[[CLONE1_DECL]]#0 temporary_lhs : i32, !fir.ref +! Lastprivate Allocation +!CHECK: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" +!CHECK: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFfirstpriv_lastpriv_intEarg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK-NOT: omp.barrier +!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { + +! Testing last iteration check +!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 +!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32 +!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32 +!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1 +!CHECK: fir.if %[[IV_CMP]] { +!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref +! Testing lastprivate val update +!CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE2_DECL]]#0 : !fir.ref +!CHECK-NEXT: hlfir.assign %[[CLONE_LD]] to %[[ARG2_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK-NEXT: } +!CHECK-NEXT: omp.yield + +subroutine firstpriv_lastpriv_int(arg1, arg2) + integer :: arg1, arg2 +!$OMP PARALLEL +!$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg2) +do n = 1, 5 + arg1 = 2 + arg2 = 3 + print *, arg1, arg2 +end do +!$OMP END DO +!$OMP END PARALLEL +print *, arg1, arg2 +end subroutine + +!CHECK: func.func @_QPfirstpriv_lastpriv_int2(%[[ARG1:.*]]: !fir.ref {fir.bindc_name = "arg1"}) { +!CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFfirstpriv_lastpriv_int2Earg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.parallel { +! Firstprivate update +!CHECK: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1" +!CHECK: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFfirstpriv_lastpriv_int2Earg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref +!CHECK-NEXT: hlfir.assign %[[FPV_LD]] to %[[CLONE1_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK-NEXT: omp.barrier +!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +! Testing last iteration check +!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 +!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32 +!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32 +!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1 +!CHECK: fir.if %[[IV_CMP]] { +!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref +! Testing lastprivate val update +!CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE1_DECL]]#0 : !fir.ref +!CHECK-NEXT: hlfir.assign %[[CLONE_LD]] to %[[ARG1_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK-NEXT: } +!CHECK-NEXT: omp.yield + +subroutine firstpriv_lastpriv_int2(arg1) + integer :: arg1 +!$OMP PARALLEL +!$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg1) +do n = 1, 5 + arg1 = 2 + print *, arg1 +end do +!$OMP END DO +!$OMP END PARALLEL +print *, arg1 +end subroutine diff --git a/flang/test/Lower/OpenMP/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/parallel-private-clause.f90 new file mode 100644 index 0000000000000..8d288f6483493 --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-private-clause.f90 @@ -0,0 +1,380 @@ +! This test checks lowering of OpenMP parallel Directive with +! `PRIVATE` clause present. + +! REQUIRES: shell +! RUN: bbc --use-desc-for-alloc=false -fopenmp -emit-fir %s -o - | \ +! RUN: FileCheck %s --check-prefix=FIRDialect + +!FIRDialect: func @_QPprivate_clause(%[[ARG1:.*]]: !fir.ref{{.*}}, %[[ARG2:.*]]: !fir.ref>{{.*}}, %[[ARG3:.*]]: !fir.boxchar<1>{{.*}}, %[[ARG4:.*]]: !fir.boxchar<1>{{.*}}) { +!FIRDialect-DAG: %[[ALPHA:.*]] = fir.alloca i32 {{{.*}}, uniq_name = "{{.*}}Ealpha"} +!FIRDialect-DAG: %[[ALPHA_ARRAY:.*]] = fir.alloca !fir.array<10xi32> {{{.*}}, uniq_name = "{{.*}}Ealpha_array"} +!FIRDialect-DAG: %[[BETA:.*]] = fir.alloca !fir.char<1,5> {{{.*}}, uniq_name = "{{.*}}Ebeta"} +!FIRDialect-DAG: %[[BETA_ARRAY:.*]] = fir.alloca !fir.array<10x!fir.char<1,5>> {{{.*}}, uniq_name = "{{.*}}Ebeta_array"} + +!FIRDialect-DAG: omp.parallel { +!FIRDialect-DAG: %[[ALPHA_PRIVATE:.*]] = fir.alloca i32 {{{.*}}, pinned, uniq_name = "{{.*}}Ealpha"} +!FIRDialect-DAG: %[[ALPHA_ARRAY_PRIVATE:.*]] = fir.alloca !fir.array<10xi32> {{{.*}}, pinned, uniq_name = "{{.*}}Ealpha_array"} +!FIRDialect-DAG: %[[BETA_PRIVATE:.*]] = fir.alloca !fir.char<1,5> {{{.*}}, pinned, uniq_name = "{{.*}}Ebeta"} +!FIRDialect-DAG: %[[BETA_ARRAY_PRIVATE:.*]] = fir.alloca !fir.array<10x!fir.char<1,5>> {{{.*}}, pinned, uniq_name = "{{.*}}Ebeta_array"} +!FIRDialect-DAG: %[[ARG1_PRIVATE:.*]] = fir.alloca i32 {{{.*}}, pinned, uniq_name = "{{.*}}Earg1"} +!FIRDialect-DAG: %[[ARG2_ARRAY_PRIVATE:.*]] = fir.alloca !fir.array<10xi32> {{{.*}}, pinned, uniq_name = "{{.*}}Earg2"} +!FIRDialect-DAG: %[[ARG3_PRIVATE:.*]] = fir.alloca !fir.char<1,5> {{{.*}}, pinned, uniq_name = "{{.*}}Earg3"} +!FIRDialect-DAG: %[[ARG4_ARRAY_PRIVATE:.*]] = fir.alloca !fir.array<10x!fir.char<1,5>> {{{.*}}, pinned, uniq_name = "{{.*}}Earg4"} +!FIRDialect: omp.terminator +!FIRDialect: } + +subroutine private_clause(arg1, arg2, arg3, arg4) + + integer :: arg1, arg2(10) + integer :: alpha, alpha_array(10) + character(5) :: arg3, arg4(10) + character(5) :: beta, beta_array(10) + +!$OMP PARALLEL PRIVATE(alpha, alpha_array, beta, beta_array, arg1, arg2, arg3, arg4) + alpha = 1 + alpha_array = 4 + beta = "hi" + beta_array = "hi" + arg1 = 2 + arg2 = 3 + arg3 = "world" + arg4 = "world" +!$OMP END PARALLEL + +end subroutine + +!FIRDialect: func @_QPprivate_clause_scalar() { +!FIRDialect-DAG: {{.*}} = fir.alloca !fir.complex<4> {bindc_name = "c", uniq_name = "{{.*}}Ec"} +!FIRDialect-DAG: {{.*}} = fir.alloca i8 {bindc_name = "i1", uniq_name = "{{.*}}Ei1"} +!FIRDialect-DAG: {{.*}} = fir.alloca i128 {bindc_name = "i16", uniq_name = "{{.*}}Ei16"} +!FIRDialect-DAG: {{.*}} = fir.alloca i16 {bindc_name = "i2", uniq_name = "{{.*}}Ei2"} +!FIRDialect-DAG: {{.*}} = fir.alloca i32 {bindc_name = "i4", uniq_name = "{{.*}}Ei4"} +!FIRDialect-DAG: {{.*}} = fir.alloca i64 {bindc_name = "i8", uniq_name = "{{.*}}Ei8"} +!FIRDialect-DAG: {{.*}} = fir.alloca !fir.logical<4> {bindc_name = "l", uniq_name = "{{.*}}El"} +!FIRDialect-DAG: {{.*}} = fir.alloca f32 {bindc_name = "r", uniq_name = "{{.*}}Er"} + +!FIRDialect: omp.parallel { +!FIRDialect-DAG: {{.*}} = fir.alloca i8 {bindc_name = "i1", pinned, uniq_name = "{{.*}}Ei1"} +!FIRDialect-DAG: {{.*}} = fir.alloca i16 {bindc_name = "i2", pinned, uniq_name = "{{.*}}Ei2"} +!FIRDialect-DAG: {{.*}} = fir.alloca i32 {bindc_name = "i4", pinned, uniq_name = "{{.*}}Ei4"} +!FIRDialect-DAG: {{.*}} = fir.alloca i64 {bindc_name = "i8", pinned, uniq_name = "{{.*}}Ei8"} +!FIRDialect-DAG: {{.*}} = fir.alloca i128 {bindc_name = "i16", pinned, uniq_name = "{{.*}}Ei16"} +!FIRDialect-DAG: {{.*}} = fir.alloca !fir.complex<4> {bindc_name = "c", pinned, uniq_name = "{{.*}}Ec"} +!FIRDialect-DAG: {{.*}} = fir.alloca !fir.logical<4> {bindc_name = "l", pinned, uniq_name = "{{.*}}El"} +!FIRDialect-DAG: {{.*}} = fir.alloca f32 {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} + +subroutine private_clause_scalar() + + integer(kind=1) :: i1 + integer(kind=2) :: i2 + integer(kind=4) :: i4 + integer(kind=8) :: i8 + integer(kind=16) :: i16 + complex :: c + logical :: l + real :: r + +!$OMP PARALLEL PRIVATE(i1, i2, i4, i8, i16, c, l, r) + print *, i1, i2, i4, i8, i16, c, l, r +!$OMP END PARALLEL + +end subroutine + +!FIRDialect: func @_QPprivate_clause_derived_type() { +!FIRDialect: {{.*}} = fir.alloca !fir.type<{{.*}}{t_i:i32,t_arr:!fir.array<5xi32>}> {bindc_name = "t", uniq_name = "{{.*}}Et"} + +!FIRDialect: omp.parallel { +!FIRDialect: {{.*}} = fir.alloca !fir.type<{{.*}}{t_i:i32,t_arr:!fir.array<5xi32>}> {bindc_name = "t", pinned, uniq_name = "{{.*}}Et"} + +subroutine private_clause_derived_type() + + type my_type + integer :: t_i + integer :: t_arr(5) + end type my_type + type(my_type) :: t + +!$OMP PARALLEL PRIVATE(t) + print *, t%t_i +!$OMP END PARALLEL + +end subroutine + +!FIRDialect: func @_QPprivate_clause_allocatable() { +!FIRDialect-DAG: {{.*}} = fir.alloca !fir.box> {bindc_name = "x", uniq_name = "{{.*}}Ex"} +!FIRDialect-DAG: {{.*}} = fir.alloca !fir.heap {uniq_name = "{{.*}}Ex.addr"} +!FIRDialect-DAG: {{.*}} = fir.alloca !fir.box>> {bindc_name = "x2", uniq_name = "{{.*}}Ex2"} +!FIRDialect-DAG: {{.*}} = fir.alloca !fir.heap> {uniq_name = "{{.*}}Ex2.addr"} +!FIRDialect-DAG: {{.*}} = fir.address_of(@{{.*}}Ex3) : !fir.ref>> +!FIRDialect-DAG: [[TMP8:%.*]] = fir.address_of(@{{.*}}Ex4) : !fir.ref>>> + +!FIRDialect: omp.parallel { +!FIRDialect-DAG: [[TMP35:%.*]] = fir.alloca !fir.box> {bindc_name = "x", pinned, uniq_name = "{{.*}}Ex"} +!FIRDialect-DAG: [[TMP39:%.*]] = fir.alloca !fir.box>> {bindc_name = "x2", pinned, uniq_name = "{{.*}}Ex2"} +!FIRDialect-DAG: [[TMP45:%.*]] = fir.alloca !fir.box> {bindc_name = "x3", pinned, uniq_name = "{{.*}}Ex3"} + +!FIRDialect-DAG: [[TMP51:%.*]] = fir.load [[TMP8]] : !fir.ref>>> +!FIRDialect-DAG: [[TMP97:%.*]] = fir.load [[TMP8]] : !fir.ref>>> +!FIRDialect-DAG: [[TMP98:%.*]]:3 = fir.box_dims [[TMP97]], {{.*}} : (!fir.box>>, index) -> (index, index, index) +!FIRDialect-DAG: [[TMP50:%.*]] = fir.alloca !fir.box>> {bindc_name = "x4", pinned, uniq_name = "{{.*}}Ex4"} + +! FIRDialect-DAG: [[TMP101:%.*]] = fir.allocmem !fir.array, {{.*}} {fir.must_be_heap = true, uniq_name = "{{.*}}Ex4.alloc"} +! FIRDialect-DAG: [[TMP102:%.*]] = fir.shape_shift {{.*}}#0, {{.*}} : (index, index) -> !fir.shapeshift<1> +! FIRDialect-DAG: [[TMP103:%.*]] = fir.embox [[TMP101]]([[TMP102]]) : (!fir.heap>, !fir.shapeshift<1>) -> !fir.box>> +! FIRDialect-DAG: fir.store [[TMP103]] to [[TMP50]] : !fir.ref>>> + + +subroutine private_clause_allocatable() + + integer, allocatable :: x, x2(:) + integer, allocatable, save :: x3, x4(:) + + print *, x, x2, x3, x4 + +!$OMP PARALLEL PRIVATE(x, x2, x3, x4) + print *, x, x2, x3, x4 +!$OMP END PARALLEL + +end subroutine + + +!FIRDialect: func @_QPprivate_clause_real_call_allocatable() { +!FIRDialect-DAG: {{.*}} = fir.alloca !fir.box> {bindc_name = "x5", uniq_name = "{{.*}}Ex5"} +!FIRDialect-DAG: {{.*}} = fir.zero_bits !fir.heap +!FIRDialect-DAG: {{.*}} = fir.embox %1 : (!fir.heap) -> !fir.box> +!FIRDialect-DAG: fir.store %2 to %0 : !fir.ref>> +!FIRDialect-DAG: omp.parallel { +!FIRDialect-DAG: [[TMP203:%.*]] = fir.alloca !fir.box> {bindc_name = "x5", pinned, uniq_name = "{{.*}}Ex5"} + +!FIRDialect-DAG: fir.if %{{.*}} { + +!FIRDialect-DAG: fir.store %{{.*}} to [[TMP203]] : !fir.ref>> +!FIRDialect-DAG: } else { + +!FIRDialect-DAG: fir.store %{{.*}} to [[TMP203]] : !fir.ref>> +!FIRDialect-DAG: } +!FIRDialect-DAG: fir.call @_QFprivate_clause_real_call_allocatablePhelper_private_clause_real_call_allocatable([[TMP203]]) fastmath : (!fir.ref>>) -> () +!FIRDialect-DAG: %{{.*}} = fir.load [[TMP203]] : !fir.ref>> + +!FIRDialect-DAG: fir.if %{{.*}} { +!FIRDialect-DAG: %{{.*}} = fir.load [[TMP203]] : !fir.ref>> + +!FIRDialect-DAG: fir.store %{{.*}} to [[TMP203]] : !fir.ref>> +!FIRDialect-DAG: } +!FIRDialect-DAG: omp.terminator +!FIRDialect-DAG: } +!FIRDialect-DAG: return +!FIRDialect-DAG: } + + +subroutine private_clause_real_call_allocatable + real, allocatable :: x5 + !$omp parallel private(x5) + call helper_private_clause_real_call_allocatable(x5) + !$omp end parallel + contains + subroutine helper_private_clause_real_call_allocatable(x6) + real, allocatable :: x6 + print *, allocated(x6) + end subroutine +end subroutine + +!FIRDialect: func.func @_QPincrement_list_items(%arg0: !fir.ref>>}>>>> {fir.bindc_name = "head"}) { +!FIRDialect: {{%.*}} = fir.alloca !fir.box>>}>>> {bindc_name = "p", uniq_name = "_QFincrement_list_itemsEp"} +!FIRDialect: omp.parallel { +!FIRDialect: {{%.*}} = fir.alloca !fir.box>>}>>> {bindc_name = "p", pinned, uniq_name = "_QFincrement_list_itemsEp"} +!FIRDialect: omp.single { + +!FIRDialect: omp.terminator +!FIRDialect: omp.terminator +!FIRDialect: return + +subroutine increment_list_items (head) + type node + integer :: payload + type (node), pointer :: next + end type node + + type (node), pointer :: head + type (node), pointer :: p +!$omp parallel private(p) +!$omp single + p => head + do + p => p%next + if ( associated (p) .eqv. .false. ) exit + end do +!$omp end single +!$omp end parallel +end subroutine increment_list_items + +!FIRDialect: func.func @_QPparallel_pointer() { +!FIRDialect-DAG: [[PP0:%.*]] = fir.alloca !fir.box> {bindc_name = "y1", uniq_name = "{{.*}}Ey1"} +!FIRDialect-DAG: [[PP1:%.*]] = fir.alloca !fir.ptr {uniq_name = "{{.*}}Ey1.addr"} +!FIRDialect-DAG: [[PP2:%.*]] = fir.zero_bits !fir.ptr +!FIRDialect: fir.store [[PP2]] to [[PP1]] : !fir.ref> +!FIRDialect-DAG: [[PP3:%.*]] = fir.alloca !fir.box>> {bindc_name = "y2", uniq_name = "{{.*}}Ey2"} + +!FIRDialect: fir.store %6 to %3 : !fir.ref>>> +!FIRDialect-DAG: [[PP7:%.*]] = fir.alloca i32 {bindc_name = "z1", fir.target, uniq_name = "{{.*}}Ez1"} + +!FIRDialect-DAG: [[PP8:%.*]] = fir.alloca !fir.array<10xi32> {bindc_name = "z2", fir.target, uniq_name = "{{.*}}Ez2"} +!FIRDialect: omp.parallel { +!FIRDialect-DAG: [[PP9:%.*]] = fir.alloca !fir.box> {bindc_name = "y1", pinned, uniq_name = "{{.*}}Ey1"} +!FIRDialect-DAG: [[PP10:%.*]] = fir.alloca !fir.box>> {bindc_name = "y2", pinned, uniq_name = "{{.*}}Ey2"} +!FIRDialect-DAG: [[PP11:%.*]] = fir.embox [[PP7]] : (!fir.ref) -> !fir.box> +!FIRDialect: fir.store [[PP11]] to [[PP9]] : !fir.ref>> +!FIRDialect-DAG: [[PP12:%.*]] = fir.shape %c{{.*}} : (index) -> !fir.shape<1> +!FIRDialect-DAG: [[PP13:%.*]] = fir.embox [[PP8]]([[PP12]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box>> +!FIRDialect: fir.store %13 to [[PP10]] : !fir.ref>>> +!FIRDialect: omp.terminator +!FIRDialect: } +!FIRDialect: return +!FIRDialect: } + +subroutine parallel_pointer() + integer, pointer :: y1, y2(:) + integer, target :: z1, z2(10) + +!$omp parallel private(y1, y2) + y1=>z1 + y2=>z2 +!$omp end parallel +end subroutine parallel_pointer + + +!FIRDialect-LABEL: func @_QPsimple_loop_1() +subroutine simple_loop_1 + integer :: i + real, allocatable :: r; + ! FIRDialect: omp.parallel + !$OMP PARALLEL PRIVATE(r) + ! FIRDialect: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} + + ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + + ! FIRDialect: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 + + ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP DO + do i=1, 9 + ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref + ! FIRDialect: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref + ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i + end do + ! FIRDialect: omp.yield + ! FIRDialect: {{%.*}} = fir.load [[R]] : !fir.ref>> + ! FIRDialect: fir.if {{%.*}} { + ! FIRDialect: [[LD:%.*]] = fir.load [[R]] : !fir.ref>> + ! FIRDialect: [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box>) -> !fir.heap + ! FIRDialect: fir.freemem [[AD]] : !fir.heap + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + !$OMP END DO + ! FIRDialect: omp.terminator + !$OMP END PARALLEL +end subroutine + +!FIRDialect-LABEL: func @_QPsimple_loop_2() +subroutine simple_loop_2 + integer :: i + real, allocatable :: r; + ! FIRDialect: omp.parallel + !$OMP PARALLEL + ! FIRDialect: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} + + ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + + ! FIRDialect: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 + + ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP DO PRIVATE(r) + do i=1, 9 + ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref + ! FIRDialect: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref + ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i + end do + ! FIRDialect: omp.yield + ! FIRDialect: {{%.*}} = fir.load [[R]] : !fir.ref>> + ! FIRDialect: fir.if {{%.*}} { + ! FIRDialect: [[LD:%.*]] = fir.load [[R]] : !fir.ref>> + ! FIRDialect: [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box>) -> !fir.heap + ! FIRDialect: fir.freemem [[AD]] : !fir.heap + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + !$OMP END DO + ! FIRDialect: omp.terminator + !$OMP END PARALLEL +end subroutine + +!FIRDialect-LABEL: func @_QPsimple_loop_3() +subroutine simple_loop_3 + integer :: i + real, allocatable :: r; + ! FIRDialect: omp.parallel + ! FIRDialect: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} + + ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + + ! FIRDialect: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 + + ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP PARALLEL DO PRIVATE(r) + do i=1, 9 + ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref + ! FIRDialect: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref + ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i + end do + ! FIRDialect: omp.yield + ! FIRDialect: {{%.*}} = fir.load [[R]] : !fir.ref>> + ! FIRDialect: fir.if {{%.*}} { + ! FIRDialect: [[LD:%.*]] = fir.load [[R]] : !fir.ref>> + ! FIRDialect: [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box>) -> !fir.heap + ! FIRDialect: fir.freemem [[AD]] : !fir.heap + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + !$OMP END PARALLEL DO + ! FIRDialect: omp.terminator +end subroutine + +!CHECK-LABEL: func @_QPsimd_loop_1() +subroutine simd_loop_1 + integer :: i + real, allocatable :: r; + ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + + ! FIRDialect: %[[LB:.*]] = arith.constant 1 : i32 + ! FIRDialect: %[[UB:.*]] = arith.constant 9 : i32 + ! FIRDialect: %[[STEP:.*]] = arith.constant 1 : i32 + + ! FIRDialect: omp.simdloop for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + !$OMP SIMD PRIVATE(r) + do i=1, 9 + ! FIRDialect: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref + ! FIRDialect: %[[LOAD_IV:.*]] = fir.load %[[LOCAL]] : !fir.ref + ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i + end do + !$OMP END SIMD + ! FIRDialect: omp.yield + ! FIRDialect: {{%.*}} = fir.load [[R]] : !fir.ref>> + ! FIRDialect: fir.if {{%.*}} { + ! FIRDialect: [[LD:%.*]] = fir.load [[R]] : !fir.ref>> + ! FIRDialect: [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box>) -> !fir.heap + ! FIRDialect: fir.freemem [[AD]] : !fir.heap + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> +end subroutine diff --git a/flang/test/Lower/OpenMP/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/parallel-wsloop.f90 new file mode 100644 index 0000000000000..c06f941b74b58 --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-wsloop.f90 @@ -0,0 +1,298 @@ +! This test checks lowering of OpenMP DO Directive (Worksharing). + +! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s + +! CHECK-LABEL: func @_QPsimple_parallel_do() +subroutine simple_parallel_do + integer :: i + ! CHECK: omp.parallel + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP PARALLEL DO + do i=1, 9 + ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref + ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]]#0 : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + ! CHECK: omp.terminator + !$OMP END PARALLEL DO +end subroutine + +! CHECK-LABEL: func @_QPparallel_do_with_parallel_clauses +! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref {fir.bindc_name = "nt"} +subroutine parallel_do_with_parallel_clauses(cond, nt) + ! CHECK: %[[COND_DECL:.*]]:2 = hlfir.declare %[[COND_REF]] {uniq_name = "_QFparallel_do_with_parallel_clausesEcond"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) + ! CHECK: %[[NT_DECL:.*]]:2 = hlfir.declare %[[NT_REF]] {uniq_name = "_QFparallel_do_with_parallel_clausesEnt"} : (!fir.ref) -> (!fir.ref, !fir.ref) + logical :: cond + integer :: nt + integer :: i + ! CHECK: %[[COND:.*]] = fir.load %[[COND_DECL]]#0 : !fir.ref> + ! CHECK: %[[COND_CVT:.*]] = fir.convert %[[COND]] : (!fir.logical<4>) -> i1 + ! CHECK: %[[NT:.*]] = fir.load %[[NT_DECL]]#0 : !fir.ref + ! CHECK: omp.parallel if(%[[COND_CVT]] : i1) num_threads(%[[NT]] : i32) proc_bind(close) + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close) + do i=1, 9 + ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref + ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]]#0 : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + ! CHECK: omp.terminator + !$OMP END PARALLEL DO +end subroutine + +! CHECK-LABEL: func @_QPparallel_do_with_clauses +! CHECK-SAME: %[[NT_REF:.*]]: !fir.ref {fir.bindc_name = "nt"} +subroutine parallel_do_with_clauses(nt) + ! CHECK: %[[NT_DECL:.*]]:2 = hlfir.declare %[[NT_REF]] {uniq_name = "_QFparallel_do_with_clausesEnt"} : (!fir.ref) -> (!fir.ref, !fir.ref) + integer :: nt + integer :: i + ! CHECK: %[[NT:.*]] = fir.load %[[NT_DECL]]#0 : !fir.ref + ! CHECK: omp.parallel num_threads(%[[NT]] : i32) + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic) + do i=1, 9 + ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref + ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]]#0 : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + ! CHECK: omp.terminator + !$OMP END PARALLEL DO +end subroutine + +!=============================================================================== +! Checking for the following construct: +! !$omp parallel do private(...) firstprivate(...) +!=============================================================================== + +! CHECK-LABEL: func @_QPparallel_do_with_privatisation_clauses +! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref {fir.bindc_name = "nt"} +subroutine parallel_do_with_privatisation_clauses(cond,nt) + ! CHECK: %[[COND_DECL:.*]]:2 = hlfir.declare %[[COND_REF]] {uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) + ! CHECK: %[[NT_DECL:.*]]:2 = hlfir.declare %[[NT_REF]] {uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"} : (!fir.ref) -> (!fir.ref, !fir.ref) + logical :: cond + integer :: nt + integer :: i + ! CHECK: omp.parallel + ! CHECK: %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"} + ! CHECK: %[[PRIVATE_COND_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_COND_REF]] {uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) + ! CHECK: %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"} + ! CHECK: %[[PRIVATE_NT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_NT_REF]] {uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! CHECK: %[[NT_VAL:.*]] = fir.load %[[NT_DECL]]#0 : !fir.ref + ! CHECK: hlfir.assign %[[NT_VAL]] to %[[PRIVATE_NT_DECL]]#0 temporary_lhs : i32, !fir.ref + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt) + do i=1, 9 + ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref + ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]]#0 : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_DECL]]#0 : !fir.ref> + ! CHECK: %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1 + ! CHECK: fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) {{.*}}: (!fir.ref, i1) -> i1 + ! CHECK: %[[PRIVATE_NT_VAL:.*]] = fir.load %[[PRIVATE_NT_DECL]]#0 : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[PRIVATE_NT_VAL]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i, cond, nt + end do + ! CHECK: omp.yield + ! CHECK: omp.terminator + !$OMP END PARALLEL DO +end subroutine + +!=============================================================================== +! Checking for the following construct +! !$omp parallel private(...) firstprivate(...) +! !$omp do +!=============================================================================== + +subroutine parallel_private_do(cond,nt) +logical :: cond + integer :: nt + integer :: i + !$OMP PARALLEL PRIVATE(cond) FIRSTPRIVATE(nt) + !$OMP DO + do i=1, 9 + call foo(i, cond, nt) + end do + !$OMP END DO + !$OMP END PARALLEL +end subroutine parallel_private_do + +! CHECK-LABEL: func.func @_QPparallel_private_do( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "cond"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "nt"}) { +! CHECK: %[[NT_DECL:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFparallel_private_doEnt"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: omp.parallel { +! CHECK: %[[I_PRIV:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV]] {uniq_name = "_QFparallel_private_doEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[COND_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_private_doEcond"} +! CHECK: %[[COND_DECL:.*]]:2 = hlfir.declare %[[COND_ADDR]] {uniq_name = "_QFparallel_private_doEcond"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[NT_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_private_doEnt"} +! CHECK: %[[NT_PRIV_DECL:.*]]:2 = hlfir.declare %[[NT_PRIV_ADDR]] {uniq_name = "_QFparallel_private_doEnt"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[NT:.*]] = fir.load %[[NT_DECL]]#0 : !fir.ref +! CHECK: hlfir.assign %[[NT]] to %[[NT_PRIV_DECL]]#0 temporary_lhs : i32, !fir.ref +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref +! CHECK: fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + +!=============================================================================== +! Checking for the following construct +! !$omp parallel +! !$omp do firstprivate(...) firstprivate(...) +!=============================================================================== + +subroutine omp_parallel_multiple_firstprivate_do(a, b) + integer::a, b + !$OMP PARALLEL FIRSTPRIVATE(a) FIRSTPRIVATE(b) + !$OMP DO + do i=1, 10 + call bar(i, a) + end do + !$OMP END DO + !$OMP END PARALLEL +end subroutine omp_parallel_multiple_firstprivate_do + +! CHECK-LABEL: func.func @_QPomp_parallel_multiple_firstprivate_do( +! CHECK-SAME: %[[A_ADDR:.*]]: !fir.ref {fir.bindc_name = "a"}, +! CHECK-SAME: %[[B_ADDR:.*]]: !fir.ref {fir.bindc_name = "b"}) { +! CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %[[A_ADDR]] {uniq_name = "_QFomp_parallel_multiple_firstprivate_doEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[B_DECL:.*]]:2 = hlfir.declare %[[B_ADDR]] {uniq_name = "_QFomp_parallel_multiple_firstprivate_doEb"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: omp.parallel { +! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_multiple_firstprivate_doEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_multiple_firstprivate_doEa"} +! CHECK: %[[A_PRIV_DECL:.*]]:2 = hlfir.declare %[[A_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_multiple_firstprivate_doEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[A:.*]] = fir.load %[[A_DECL]]#0 : !fir.ref +! CHECK: hlfir.assign %[[A]] to %[[A_PRIV_DECL]]#0 temporary_lhs : i32, !fir.ref +! CHECK: %[[B_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "b", pinned, uniq_name = "_QFomp_parallel_multiple_firstprivate_doEb"} +! CHECK: %[[B_PRIV_DECL:.*]]:2 = hlfir.declare %[[B_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_multiple_firstprivate_doEb"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[B:.*]] = fir.load %[[B_DECL]]#0 : !fir.ref +! CHECK: hlfir.assign %[[B]] to %[[B_PRIV_DECL]]#0 temporary_lhs : i32, !fir.ref +! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref +! CHECK: fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref) -> () +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + +!=============================================================================== +! Checking for the following construct +! !$omp parallel +! !$omp do private(...) firstprivate(...) +!=============================================================================== + +subroutine parallel_do_private(cond,nt) +logical :: cond + integer :: nt + integer :: i + !$OMP PARALLEL + !$OMP DO PRIVATE(cond) FIRSTPRIVATE(nt) + do i=1, 9 + call foo(i, cond, nt) + end do + !$OMP END DO + !$OMP END PARALLEL +end subroutine parallel_do_private + +! CHECK-LABEL: func.func @_QPparallel_do_private( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "cond"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "nt"}) { +! CHECK: %[[NT_DECL:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFparallel_do_privateEnt"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: omp.parallel { +! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV_ADDR]] {uniq_name = "_QFparallel_do_privateEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[COND_PRIV_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_privateEcond"} +! CHECK: %[[COND_PRIV_DECL:.*]]:2 = hlfir.declare %[[COND_PRIV_ADDR]] {uniq_name = "_QFparallel_do_privateEcond"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[NT_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_privateEnt"} +! CHECK: %[[NT_PRIV_DECL:.*]]:2 = hlfir.declare %[[NT_PRIV_ADDR]] {uniq_name = "_QFparallel_do_privateEnt"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[NT_VAL:.*]] = fir.load %[[NT_DECL]]#0 : !fir.ref +! CHECK: hlfir.assign %[[NT_VAL]] to %[[NT_PRIV_DECL]]#0 temporary_lhs : i32, !fir.ref +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref +! CHECK: fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_PRIV_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + +!=============================================================================== +! Checking for the following construct +! !$omp parallel +! !$omp do firstprivate(...) firstprivate(...) +!=============================================================================== + +subroutine omp_parallel_do_multiple_firstprivate(a, b) + integer::a, b + !$OMP PARALLEL + !$OMP DO FIRSTPRIVATE(a) FIRSTPRIVATE(b) + do i=1, 10 + call bar(i, a) + end do + !$OMP END DO + !$OMP END PARALLEL +end subroutine omp_parallel_do_multiple_firstprivate + +! CHECK-LABEL: func.func @_QPomp_parallel_do_multiple_firstprivate( +! CHECK-SAME: %[[A_ADDR:.*]]: !fir.ref {fir.bindc_name = "a"}, +! CHECK-SAME: %[[B_ADDR:.*]]: !fir.ref {fir.bindc_name = "b"}) { +! CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %[[A_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[B_DECL:.*]]:2 = hlfir.declare %[[B_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEb"} : (!fir.ref) -> (!fir.ref, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"} +! CHECK: %[[A_PRIV_DECL:.*]]:2 = hlfir.declare %[[A_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[A:.*]] = fir.load %[[A_DECL]]#0 : !fir.ref +! CHECK: hlfir.assign %[[A]] to %[[A_PRIV_DECL]]#0 temporary_lhs : i32, !fir.ref +! CHECK: %[[B_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "b", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEb"} +! CHECK: %[[B_PRIV_DECL:.*]]:2 = hlfir.declare %[[B_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEb"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[B:.*]] = fir.load %[[B_DECL]]#0 : !fir.ref +! CHECK: hlfir.assign %[[B]] to %[[B_PRIV_DECL]]#0 temporary_lhs : i32, !fir.ref +! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref +! CHECK: fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref) -> () +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } diff --git a/flang/test/Lower/OpenMP/parallel.f90 b/flang/test/Lower/OpenMP/parallel.f90 new file mode 100644 index 0000000000000..0e43244994cfd --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel.f90 @@ -0,0 +1,206 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s + +!CHECK-LABEL: func @_QPparallel_simple +subroutine parallel_simple() + !CHECK: omp.parallel +!$omp parallel + !CHECK: fir.call + call f1() +!$omp end parallel +end subroutine parallel_simple + +!=============================================================================== +! `if` clause +!=============================================================================== + +!CHECK-LABEL: func @_QPparallel_if +subroutine parallel_if(alpha, beta, gamma) + integer, intent(in) :: alpha + logical, intent(in) :: beta + logical(1) :: logical1 + logical(2) :: logical2 + logical(4) :: logical4 + logical(8) :: logical8 + + !CHECK: omp.parallel if(%{{.*}} : i1) { + !$omp parallel if(alpha .le. 0) + !CHECK: fir.call + call f1() + !CHECK: omp.terminator + !$omp end parallel + + !CHECK: omp.parallel if(%{{.*}} : i1) { + !$omp parallel if(.false.) + !CHECK: fir.call + call f2() + !CHECK: omp.terminator + !$omp end parallel + + !CHECK: omp.parallel if(%{{.*}} : i1) { + !$omp parallel if(alpha .ge. 0) + !CHECK: fir.call + call f3() + !CHECK: omp.terminator + !$omp end parallel + + !CHECK: omp.parallel if(%{{.*}} : i1) { + !$omp parallel if(.true.) + !CHECK: fir.call + call f4() + !CHECK: omp.terminator + !$omp end parallel + + !CHECK: omp.parallel if(%{{.*}} : i1) { + !$omp parallel if(beta) + !CHECK: fir.call + call f1() + !CHECK: omp.terminator + !$omp end parallel + + !CHECK: omp.parallel if(%{{.*}} : i1) { + !$omp parallel if(logical1) + !CHECK: fir.call + call f1() + !CHECK: omp.terminator + !$omp end parallel + + !CHECK: omp.parallel if(%{{.*}} : i1) { + !$omp parallel if(logical2) + !CHECK: fir.call + call f1() + !CHECK: omp.terminator + !$omp end parallel + + !CHECK: omp.parallel if(%{{.*}} : i1) { + !$omp parallel if(logical4) + !CHECK: fir.call + call f1() + !CHECK: omp.terminator + !$omp end parallel + + !CHECK: omp.parallel if(%{{.*}} : i1) { + !$omp parallel if(logical8) + !CHECK: fir.call + call f1() + !CHECK: omp.terminator + !$omp end parallel + +end subroutine parallel_if + +!=============================================================================== +! `num_threads` clause +!=============================================================================== + +!CHECK-LABEL: func @_QPparallel_numthreads +subroutine parallel_numthreads(num_threads) + integer, intent(inout) :: num_threads + + !CHECK: omp.parallel num_threads(%{{.*}}: i32) { + !$omp parallel num_threads(16) + !CHECK: fir.call + call f1() + !CHECK: omp.terminator + !$omp end parallel + + num_threads = 4 + + !CHECK: omp.parallel num_threads(%{{.*}} : i32) { + !$omp parallel num_threads(num_threads) + !CHECK: fir.call + call f2() + !CHECK: omp.terminator + !$omp end parallel + +end subroutine parallel_numthreads + +!=============================================================================== +! `proc_bind` clause +!=============================================================================== + +!CHECK-LABEL: func @_QPparallel_proc_bind +subroutine parallel_proc_bind() + + !CHECK: omp.parallel proc_bind(master) { + !$omp parallel proc_bind(master) + !CHECK: fir.call + call f1() + !CHECK: omp.terminator + !$omp end parallel + + !CHECK: omp.parallel proc_bind(close) { + !$omp parallel proc_bind(close) + !CHECK: fir.call + call f2() + !CHECK: omp.terminator + !$omp end parallel + + !CHECK: omp.parallel proc_bind(spread) { + !$omp parallel proc_bind(spread) + !CHECK: fir.call + call f3() + !CHECK: omp.terminator + !$omp end parallel + +end subroutine parallel_proc_bind + +!=============================================================================== +! `allocate` clause +!=============================================================================== + +!CHECK-LABEL: func @_QPparallel_allocate +subroutine parallel_allocate() + use omp_lib + integer :: x + !CHECK: omp.parallel allocate( + !CHECK: %{{.+}} : i32 -> %{{.+}} : !fir.ref + !CHECK: ) { + !$omp parallel allocate(omp_high_bw_mem_alloc: x) private(x) + !CHECK: arith.addi + x = x + 12 + !CHECK: omp.terminator + !$omp end parallel +end subroutine parallel_allocate + +!=============================================================================== +! multiple clauses +!=============================================================================== + +!CHECK-LABEL: func @_QPparallel_multiple_clauses +subroutine parallel_multiple_clauses(alpha, num_threads) + use omp_lib + integer, intent(inout) :: alpha + integer, intent(in) :: num_threads + + !CHECK: omp.parallel if({{.*}} : i1) proc_bind(master) { + !$omp parallel if(alpha .le. 0) proc_bind(master) + !CHECK: fir.call + call f1() + !CHECK: omp.terminator + !$omp end parallel + + !CHECK: omp.parallel num_threads({{.*}} : i32) proc_bind(close) { + !$omp parallel proc_bind(close) num_threads(num_threads) + !CHECK: fir.call + call f2() + !CHECK: omp.terminator + !$omp end parallel + + !CHECK: omp.parallel if({{.*}} : i1) num_threads({{.*}} : i32) { + !$omp parallel num_threads(num_threads) if(alpha .le. 0) + !CHECK: fir.call + call f3() + !CHECK: omp.terminator + !$omp end parallel + + !CHECK: omp.parallel if({{.*}} : i1) num_threads({{.*}} : i32) allocate( + !CHECK: %{{.+}} : i32 -> %{{.+}} : !fir.ref + !CHECK: ) { + !$omp parallel num_threads(num_threads) if(alpha .le. 0) allocate(omp_high_bw_mem_alloc: alpha) private(alpha) + !CHECK: fir.call + call f3() + !CHECK: arith.addi + alpha = alpha + 12 + !CHECK: omp.terminator + !$omp end parallel + +end subroutine parallel_multiple_clauses From 45ccc1666c723e11d7b0148b2ef5c37c7a36e916 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 26 Oct 2023 16:17:14 +0000 Subject: [PATCH 096/877] [lldb][test][Windows] XFAIL IR memory map test Since https://github.com/llvm/llvm-project/pull/68052 this has been failing. https://lab.llvm.org/buildbot/#/builders/219/builds/6545 Follow up changes have not fixed it, XFAIL while I debug it. --- lldb/test/Shell/Expr/TestIRMemoryMapWindows.test | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/Shell/Expr/TestIRMemoryMapWindows.test b/lldb/test/Shell/Expr/TestIRMemoryMapWindows.test index ae29492c9ccc9..f9f4da3c40920 100644 --- a/lldb/test/Shell/Expr/TestIRMemoryMapWindows.test +++ b/lldb/test/Shell/Expr/TestIRMemoryMapWindows.test @@ -1,4 +1,5 @@ # REQUIRES: system-windows +# XFAIL: system-windows # RUN: %clang_cl_host /Zi /GS- %p/Inputs/call-function.cpp /c /o %t.obj # RUN: %msvc_link /debug:full %t.obj /out:%t From b67d3702577d4a1848bce9be9887e554178a421a Mon Sep 17 00:00:00 2001 From: Daniel Grumberg Date: Thu, 26 Oct 2023 17:32:36 +0100 Subject: [PATCH 097/877] [clang] Prioritze decl comments from macro expansion site (#65481) For declarations declared inside a macro, e.g.: ``` `#define MAKE_FUNC(suffix) \ /// Not selected doc comment \ void func_##suffix(void) { } /// Doc comment foo MAKE_FUNC(foo) /// Doc comment bar MAKE_FUNC(bar) ```` Prefer the doc comment at the expansion site instead of the one defined in the macro. rdar://113995729 --- clang/lib/AST/ASTContext.cpp | 197 ++++++++------------- clang/test/Index/annotate-comments-objc.m | 48 +++-- clang/unittests/Tooling/SourceCodeTest.cpp | 8 +- 3 files changed, 102 insertions(+), 151 deletions(-) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 0b6ed82cc5ba0..fea7f2400b31e 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -112,10 +112,10 @@ enum FloatingRank { Ibm128Rank }; -/// \returns location that is relevant when searching for Doc comments related -/// to \p D. -static SourceLocation getDeclLocForCommentSearch(const Decl *D, - SourceManager &SourceMgr) { +/// \returns The locations that are relevant when searching for Doc comments +/// related to \p D. +static SmallVector +getDeclLocsForCommentSearch(const Decl *D, SourceManager &SourceMgr) { assert(D); // User can not attach documentation to implicit declarations. @@ -167,115 +167,48 @@ static SourceLocation getDeclLocForCommentSearch(const Decl *D, isa(D)) return {}; + SmallVector Locations; // Find declaration location. // For Objective-C declarations we generally don't expect to have multiple // declarators, thus use declaration starting location as the "declaration // location". // For all other declarations multiple declarators are used quite frequently, // so we use the location of the identifier as the "declaration location". + SourceLocation BaseLocation; if (isa(D) || isa(D) || - isa(D) || - isa(D) || + isa(D) || isa(D) || isa(D) || // Allow association with Y across {} in `typedef struct X {} Y`. isa(D)) - return D->getBeginLoc(); + BaseLocation = D->getBeginLoc(); + else + BaseLocation = D->getLocation(); - const SourceLocation DeclLoc = D->getLocation(); - if (DeclLoc.isMacroID()) { - // There are (at least) three types of macros we care about here. - // - // 1. Macros that are used in the definition of a type outside the macro, - // with a comment attached at the macro call site. - // ``` - // #define MAKE_NAME(Foo) Name##Foo - // - // /// Comment is here, where we use the macro. - // struct MAKE_NAME(Foo) { - // int a; - // int b; - // }; - // ``` - // 2. Macros that define whole things along with the comment. - // ``` - // #define MAKE_METHOD(name) \ - // /** Comment is here, inside the macro. */ \ - // void name() {} - // - // struct S { - // MAKE_METHOD(f) - // } - // ``` - // 3. Macros that both declare a type and name a decl outside the macro. - // ``` - // /// Comment is here, where we use the macro. - // typedef NS_ENUM(NSInteger, Size) { - // SizeWidth, - // SizeHeight - // }; - // ``` - // In this case NS_ENUM declares am enum type, and uses the same name for - // the typedef declaration that appears outside the macro. The comment - // here should be applied to both declarations inside and outside the - // macro. - // - // We have found a Decl name that comes from inside a macro, but - // Decl::getLocation() returns the place where the macro is being called. - // If the declaration (and not just the name) resides inside the macro, - // then we want to map Decl::getLocation() into the macro to where the - // declaration and its attached comment (if any) were written. - // - // This mapping into the macro is done by mapping the location to its - // spelling location, however even if the declaration is inside a macro, - // the name's spelling can come from a macro argument (case 2 above). In - // this case mapping the location to the spelling location finds the - // argument's position (at `f` in MAKE_METHOD(`f`) above), which is not - // where the declaration and its comment are located. - // - // To avoid this issue, we make use of Decl::getBeginLocation() instead. - // While the declaration's position is where the name is written, the - // comment is always attached to the begining of the declaration, not to - // the name. - // - // In the first case, the begin location of the decl is outside the macro, - // at the location of `typedef`. This is where the comment is found as - // well. The begin location is not inside a macro, so it's spelling - // location is the same. - // - // In the second case, the begin location of the decl is the call to the - // macro, at `MAKE_METHOD`. However its spelling location is inside the - // the macro at the location of `void`. This is where the comment is found - // again. - // - // In the third case, there's no correct single behaviour. We want to use - // the comment outside the macro for the definition that's inside the macro. - // There is also a definition outside the macro, and we want the comment to - // apply to both. The cases we care about here is NS_ENUM() and - // NS_OPTIONS(). In general, if an enum is defined inside a macro, we should - // try to find the comment there. - - // This is handling case 3 for NS_ENUM() and NS_OPTIONS(), which define - // enum types inside the macro. - if (isa(D)) { - SourceLocation MacroCallLoc = SourceMgr.getExpansionLoc(DeclLoc); - if (auto BufferRef = - SourceMgr.getBufferOrNone(SourceMgr.getFileID(MacroCallLoc)); - BufferRef.has_value()) { - llvm::StringRef buffer = BufferRef->getBuffer().substr( - SourceMgr.getFileOffset(MacroCallLoc)); - if (buffer.starts_with("NS_ENUM(") || - buffer.starts_with("NS_OPTIONS(")) { - // We want to use the comment on the call to NS_ENUM and NS_OPTIONS - // macros for the types defined inside the macros, which is at the - // expansion location. - return MacroCallLoc; - } - } + if (!D->getLocation().isMacroID()) { + Locations.emplace_back(BaseLocation); + } else { + const auto *DeclCtx = D->getDeclContext(); + + // When encountering definitions generated from a macro (that are not + // contained by another declaration in the macro) we need to try and find + // the comment at the location of the expansion but if there is no comment + // there we should retry to see if there is a comment inside the macro as + // well. To this end we return first BaseLocation to first look at the + // expansion site, the second value is the spelling location of the + // beginning of the declaration defined inside the macro. + if (!(DeclCtx && + Decl::castFromDeclContext(DeclCtx)->getLocation().isMacroID())) { + Locations.emplace_back(SourceMgr.getExpansionLoc(BaseLocation)); } - return SourceMgr.getSpellingLoc(D->getBeginLoc()); + + // We use Decl::getBeginLoc() and not just BaseLocation here to ensure that + // we don't refer to the macro argument location at the expansion site (this + // can happen if the name's spelling is provided via macro argument), and + // always to the declaration itself. + Locations.emplace_back(SourceMgr.getSpellingLoc(D->getBeginLoc())); } - return DeclLoc; + return Locations; } RawComment *ASTContext::getRawCommentForDeclNoCacheImpl( @@ -357,30 +290,36 @@ RawComment *ASTContext::getRawCommentForDeclNoCacheImpl( } RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const { - const SourceLocation DeclLoc = getDeclLocForCommentSearch(D, SourceMgr); + const auto DeclLocs = getDeclLocsForCommentSearch(D, SourceMgr); - // If the declaration doesn't map directly to a location in a file, we - // can't find the comment. - if (DeclLoc.isInvalid() || !DeclLoc.isFileID()) - return nullptr; + for (const auto DeclLoc : DeclLocs) { + // If the declaration doesn't map directly to a location in a file, we + // can't find the comment. + if (DeclLoc.isInvalid() || !DeclLoc.isFileID()) + continue; - if (ExternalSource && !CommentsLoaded) { - ExternalSource->ReadComments(); - CommentsLoaded = true; - } + if (ExternalSource && !CommentsLoaded) { + ExternalSource->ReadComments(); + CommentsLoaded = true; + } - if (Comments.empty()) - return nullptr; + if (Comments.empty()) + continue; - const FileID File = SourceMgr.getDecomposedLoc(DeclLoc).first; - if (!File.isValid()) { - return nullptr; + const FileID File = SourceMgr.getDecomposedLoc(DeclLoc).first; + if (!File.isValid()) + continue; + + const auto CommentsInThisFile = Comments.getCommentsInFile(File); + if (!CommentsInThisFile || CommentsInThisFile->empty()) + continue; + + if (RawComment *Comment = + getRawCommentForDeclNoCacheImpl(D, DeclLoc, *CommentsInThisFile)) + return Comment; } - const auto CommentsInThisFile = Comments.getCommentsInFile(File); - if (!CommentsInThisFile || CommentsInThisFile->empty()) - return nullptr; - return getRawCommentForDeclNoCacheImpl(D, DeclLoc, *CommentsInThisFile); + return nullptr; } void ASTContext::addComment(const RawComment &RC) { @@ -584,7 +523,6 @@ void ASTContext::attachCommentsToJustParsedDecls(ArrayRef Decls, // declaration, but also comments that *follow* the declaration -- thanks to // the lookahead in the lexer: we've consumed the semicolon and looked // ahead through comments. - for (const Decl *D : Decls) { assert(D); if (D->isInvalidDecl()) @@ -592,19 +530,22 @@ void ASTContext::attachCommentsToJustParsedDecls(ArrayRef Decls, D = &adjustDeclToTemplate(*D); - const SourceLocation DeclLoc = getDeclLocForCommentSearch(D, SourceMgr); - - if (DeclLoc.isInvalid() || !DeclLoc.isFileID()) - continue; - if (DeclRawComments.count(D) > 0) continue; - if (RawComment *const DocComment = - getRawCommentForDeclNoCacheImpl(D, DeclLoc, *CommentsInThisFile)) { - cacheRawCommentForDecl(*D, *DocComment); - comments::FullComment *FC = DocComment->parse(*this, PP, D); - ParsedComments[D->getCanonicalDecl()] = FC; + const auto DeclLocs = getDeclLocsForCommentSearch(D, SourceMgr); + + for (const auto DeclLoc : DeclLocs) { + if (DeclLoc.isInvalid() || !DeclLoc.isFileID()) + continue; + + if (RawComment *const DocComment = getRawCommentForDeclNoCacheImpl( + D, DeclLoc, *CommentsInThisFile)) { + cacheRawCommentForDecl(*D, *DocComment); + comments::FullComment *FC = DocComment->parse(*this, PP, D); + ParsedComments[D->getCanonicalDecl()] = FC; + break; + } } } } diff --git a/clang/test/Index/annotate-comments-objc.m b/clang/test/Index/annotate-comments-objc.m index 6a48d9ae8f2cb..f013684c1a638 100644 --- a/clang/test/Index/annotate-comments-objc.m +++ b/clang/test/Index/annotate-comments-objc.m @@ -46,18 +46,23 @@ - (void)method1_isdoxy4; /*!< method1_isdoxy4 IS_DOXYGEN_SINGLE */ // attach unrelated comments in the following cases where tag decls are // embedded in declarators. -#define DECLARE_FUNCTIONS(suffix) \ - /** functionFromMacro IS_DOXYGEN_SINGLE */ \ - void functionFromMacro(void) { \ - typedef struct Struct_notdoxy Struct_notdoxy; \ - } \ - /** functionFromMacroWithSuffix IS_DOXYGEN_SINGLE */ \ - void functionFromMacro##suffix(void) { \ - typedef struct Struct_notdoxy Struct_notdoxy; \ - } - -/// IS_DOXYGEN_NOT_ATTACHED -DECLARE_FUNCTIONS(WithSuffix) +#define DECLARE_FUNCTIONS_COMMENTS_IN_MACRO(suffix) \ + /** functionFromMacro IS_DOXYGEN_SINGLE */ \ + void functionFromMacro(void) { \ + typedef struct Struct_notdoxy Struct_notdoxy; \ + } \ + /** functionFromMacroWithSuffix IS_DOXYGEN_SINGLE */ \ + void functionFromMacro##suffix(void) { \ + typedef struct Struct_notdoxy Struct_notdoxy; \ + } + +DECLARE_FUNCTIONS_COMMENTS_IN_MACRO(WithSuffix) + +#define DECLARE_FUNCTIONS \ + void functionFromMacroWithCommentFromExpansionSite(void) { typedef struct Struct_notdoxy Struct_notdoxy; } + +/// functionFromMacroWithCommentFromExpansionSite IS_DOXYGEN_SINGLE +DECLARE_FUNCTIONS /// typedef_isdoxy1 IS_DOXYGEN_SINGLE typedef struct Struct_notdoxy *typedef_isdoxy1; @@ -68,9 +73,14 @@ void functionFromMacro(void) { \ /** namedEnumFromMacro IS_DOXYGEN_SINGLE */ \ enum name { B }; -/// IS_DOXYGEN_NOT_ATTACHED DECLARE_ENUMS(namedEnumFromMacro) +#define MYENUM(name) enum name +struct Foo { + /// Vehicles IS_DOXYGEN_SINGLE + MYENUM(Vehicles) { Car, Motorbike, Boat} a; +}; + #endif // RUN: rm -rf %t @@ -133,8 +143,10 @@ void functionFromMacro(void) { \ // CHECK: annotate-comments-objc.m:41:22: EnumDecl=An_NS_ENUM_isdoxy1:{{.*}} An_NS_ENUM_isdoxy1 IS_DOXYGEN_SINGLE // CHECK: annotate-comments-objc.m:41:22: TypedefDecl=An_NS_ENUM_isdoxy1:{{.*}} An_NS_ENUM_isdoxy1 IS_DOXYGEN_SINGLE // CHECK: annotate-comments-objc.m:41:22: EnumDecl=An_NS_ENUM_isdoxy1:{{.*}} An_NS_ENUM_isdoxy1 IS_DOXYGEN_SINGLE -// CHECK: annotate-comments-objc.m:60:1: FunctionDecl=functionFromMacro:{{.*}} BriefComment=[functionFromMacro IS_DOXYGEN_SINGLE] -// CHECK: annotate-comments-objc.m:60:1: FunctionDecl=functionFromMacroWithSuffix:{{.*}} BriefComment=[functionFromMacroWithSuffix IS_DOXYGEN_SINGLE] -// CHECK: annotate-comments-objc.m:63:32: TypedefDecl=typedef_isdoxy1:{{.*}} typedef_isdoxy1 IS_DOXYGEN_SINGLE -// CHECK: annotate-comments-objc.m:72:1: EnumDecl=enumFromMacro:{{.*}} BriefComment=[enumFromMacro IS_DOXYGEN_SINGLE] -// CHECK: annotate-comments-objc.m:72:15: EnumDecl=namedEnumFromMacro:{{.*}} BriefComment=[namedEnumFromMacro IS_DOXYGEN_SINGLE] +// CHECK: annotate-comments-objc.m:59:1: FunctionDecl=functionFromMacro:{{.*}} BriefComment=[functionFromMacro IS_DOXYGEN_SINGLE] +// CHECK: annotate-comments-objc.m:59:1: FunctionDecl=functionFromMacroWithSuffix:{{.*}} BriefComment=[functionFromMacroWithSuffix IS_DOXYGEN_SINGLE] +// CHECK: annotate-comments-objc.m:65:1: FunctionDecl=functionFromMacroWithCommentFromExpansionSite:{{.*}} BriefComment=[functionFromMacroWithCommentFromExpansionSite IS_DOXYGEN_SINGLE] +// CHECK: annotate-comments-objc.m:68:32: TypedefDecl=typedef_isdoxy1:{{.*}} typedef_isdoxy1 IS_DOXYGEN_SINGLE +// CHECK: annotate-comments-objc.m:76:1: EnumDecl=enumFromMacro:{{.*}} BriefComment=[enumFromMacro IS_DOXYGEN_SINGLE] +// CHECK: annotate-comments-objc.m:76:15: EnumDecl=namedEnumFromMacro:{{.*}} BriefComment=[namedEnumFromMacro IS_DOXYGEN_SINGLE] +// CHECK: annotate-comments-objc.m:81:10: EnumDecl=Vehicles:{{.*}} Vehicles IS_DOXYGEN_SINGLE diff --git a/clang/unittests/Tooling/SourceCodeTest.cpp b/clang/unittests/Tooling/SourceCodeTest.cpp index 3d1dbceb63a7f..3641d2ee453f4 100644 --- a/clang/unittests/Tooling/SourceCodeTest.cpp +++ b/clang/unittests/Tooling/SourceCodeTest.cpp @@ -372,13 +372,11 @@ TEST(SourceCodeTest, getAssociatedRangeWithComments) { #define DECL /* Comment */ int x $r[[DECL;]])cpp"); - // Does not include comments when only the decl or the comment come from a - // macro. - // FIXME: Change code to allow this. Visit(R"cpp( #define DECL int x - // Comment - $r[[DECL;]])cpp"); + $r[[// Comment + DECL;]])cpp"); + // Does not include comments when only the comment come from a macro. Visit(R"cpp( #define COMMENT /* Comment */ COMMENT From 3e79f4d2c24faaa24a4a12041b4c33d39fec2ec7 Mon Sep 17 00:00:00 2001 From: Abhishek Varma Date: Thu, 26 Oct 2023 22:23:17 +0530 Subject: [PATCH 098/877] [MLIR][Arith] Fix arith::AtomicRMWKind::maximumf's identity value (#70312) -- In order to compute maximum, we should always initialise the result with the largest negative value possible for the concerned element type, instead of the smallest. -- This commit essentially adds a fix to this issue. Signed-off-by: Abhishek Varma --- mlir/lib/Dialect/Arith/IR/ArithOps.cpp | 2 +- mlir/test/Dialect/Linalg/transform-op-decompose.mlir | 2 +- mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index 1002719f0b89f..56d5e0fed7618 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -2412,7 +2412,7 @@ TypedAttr mlir::arith::getIdentityValueAttr(AtomicRMWKind kind, Type resultType, const llvm::fltSemantics &semantic = llvm::cast(resultType).getFloatSemantics(); APFloat identity = useOnlyFiniteValue - ? APFloat::getSmallest(semantic, /*Negative=*/true) + ? APFloat::getLargest(semantic, /*Negative=*/true) : APFloat::getInf(semantic, /*Negative=*/true); return builder.getFloatAttr(resultType, identity); } diff --git a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir index f057a70d13964..ef0aca2cc366f 100644 --- a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir @@ -210,7 +210,7 @@ func.func @softmax(%arg0: tensor<2x16x32xf32>, %dst: tensor<2x16x32xf32>) -> ten // CHECK-LABEL: func.func @softmax( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<2x16x32xf32>, %[[DST:[a-zA-Z0-9_]+]]: tensor<2x16x32xf32>) -> tensor<2x16x32xf32> { // CHECK-DAG: %[[D1:.+]] = tensor.empty() : tensor<2x16xf32> -// CHECK-DAG: %[[CST:.+]] = arith.constant -1.401300e-45 : f32 +// CHECK-DAG: %[[CST:.+]] = arith.constant -3.40282347E+38 : f32 // CHECK: %[[D2:.+]] = linalg.fill ins(%[[CST]] : f32) outs(%[[D1]] : tensor<2x16xf32>) -> tensor<2x16xf32> // CHECK: %[[D3:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP1]]], iterator_types = ["parallel", // CHECK-SAME: "parallel", "reduction"]} ins(%[[ARG0]] : tensor<2x16x32xf32>) outs(%[[D2]] : tensor<2x16xf32>) { diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir index af08354b1ee4a..006d6105677e9 100644 --- a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir @@ -176,7 +176,7 @@ func.func @generic_split_3d_ninf(%input: tensor<32x2xf32>, %input_2: tensor<5x32 // CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-DAG: #[[$MAP4:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)> // CHECK-LABEL: func @generic_split_3d_ninf -// CHECK-DAG: %[[ID:.*]] = arith.constant -1.401300e-45 : f32 +// CHECK-DAG: %[[ID:.*]] = arith.constant -3.40282347E+38 : f32 // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<4x8x2xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x4x8xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32> From 178a1fea57b542c39de79624662085f86f3e348f Mon Sep 17 00:00:00 2001 From: Ilya Tocar Date: Thu, 26 Oct 2023 13:09:20 -0400 Subject: [PATCH 099/877] [libc++] Optimize string operator[] for known large inputs (#69500) If we know that index is larger than SSO size, we know that we can't be in SSO case, and should access the pointer. This removes extra check from operator[] for inputs known at compile time to be larger than SSO. --- libcxx/include/string | 6 ++++++ .../basic.string/string.access/index.pass.cpp | 21 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/libcxx/include/string b/libcxx/include/string index 91935162f0238..cf9f0c847eb43 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -1198,11 +1198,17 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference operator[](size_type __pos) const _NOEXCEPT { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__pos <= size(), "string index out of bounds"); + if (__builtin_constant_p(__pos) && !__fits_in_sso(__pos)) { + return *(__get_long_pointer() + __pos); + } return *(data() + __pos); } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference operator[](size_type __pos) _NOEXCEPT { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__pos <= size(), "string index out of bounds"); + if (__builtin_constant_p(__pos) && !__fits_in_sso(__pos)) { + return *(__get_long_pointer() + __pos); + } return *(__get_pointer() + __pos); } diff --git a/libcxx/test/std/strings/basic.string/string.access/index.pass.cpp b/libcxx/test/std/strings/basic.string/string.access/index.pass.cpp index a270dd579667b..8ba8bf0c8b096 100644 --- a/libcxx/test/std/strings/basic.string/string.access/index.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.access/index.pass.cpp @@ -34,10 +34,31 @@ TEST_CONSTEXPR_CXX20 void test_string() { assert(s2[0] == '\0'); } +// Same, but for the string that doesn't fit into SSO. +template +TEST_CONSTEXPR_CXX20 void test_string_long() { + S s("0123456789012345678901234567890123456789"); + const S& cs = s; + ASSERT_SAME_TYPE(decltype(s[0]), typename S::reference); + ASSERT_SAME_TYPE(decltype(cs[0]), typename S::const_reference); + LIBCPP_ASSERT_NOEXCEPT(s[0]); + LIBCPP_ASSERT_NOEXCEPT(cs[0]); + for (typename S::size_type i = 0; i < cs.size(); ++i) { + assert(s[i] == static_cast('0' + (i % 10))); + assert(cs[i] == s[i]); + } + assert(s[33] == static_cast('0' + (33 % 10))); + assert(cs[34] == s[34]); + assert(cs[cs.size()] == '\0'); + const S s2 = S(); + assert(s2[0] == '\0'); +} + TEST_CONSTEXPR_CXX20 bool test() { test_string(); #if TEST_STD_VER >= 11 test_string, min_allocator>>(); + test_string_long, min_allocator>>(); #endif return true; From 59750027b91000a56735d4f533225a3aeda996ce Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 26 Oct 2023 13:10:44 -0400 Subject: [PATCH 100/877] [libc++][NFC] Remove unused typedefs in filesystem::path helpers (#70331) I came across those typedefs while working on another change, and I noticed they were just never used. --- libcxx/include/__filesystem/path.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/libcxx/include/__filesystem/path.h b/libcxx/include/__filesystem/path.h index c104b003573dc..8a9350be2a00f 100644 --- a/libcxx/include/__filesystem/path.h +++ b/libcxx/include/__filesystem/path.h @@ -107,7 +107,6 @@ struct __is_pathable_string< _Void::__char_type> > : public __can_convert_char<_ECharT> { using _Str = basic_string<_ECharT, _Traits, _Alloc>; - using _Base = __can_convert_char<_ECharT>; _LIBCPP_HIDE_FROM_ABI static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } @@ -129,7 +128,6 @@ struct __is_pathable_string< _Void::__char_type> > : public __can_convert_char<_ECharT> { using _Str = basic_string_view<_ECharT, _Traits>; - using _Base = __can_convert_char<_ECharT>; _LIBCPP_HIDE_FROM_ABI static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } @@ -155,8 +153,6 @@ struct __is_pathable_char_array : false_type {}; template struct __is_pathable_char_array<_Source, _ECharT*, _UPtr, true> : __can_convert_char<__remove_const_t<_ECharT> > { - using _Base = __can_convert_char<__remove_const_t<_ECharT> >; - _LIBCPP_HIDE_FROM_ABI static _ECharT const* __range_begin(const _ECharT* __b) { return __b; } @@ -185,7 +181,6 @@ struct __is_pathable_iter< typename iterator_traits<_Iter>::value_type>::__char_type> > : __can_convert_char::value_type> { using _ECharT = typename iterator_traits<_Iter>::value_type; - using _Base = __can_convert_char<_ECharT>; _LIBCPP_HIDE_FROM_ABI static _Iter __range_begin(_Iter __b) { return __b; } From bf92eba697453a358412af806df5b8df29a232e0 Mon Sep 17 00:00:00 2001 From: Matt Harding Date: Thu, 26 Oct 2023 18:23:32 +0100 Subject: [PATCH 101/877] Fix comment in wasm unreachable test (#70340) Some textual editing errors got through this pull request that was merged a few weeks ago: https://github.com/llvm/llvm-project/pull/65876 This patch clears up the unintentional duplicated line, and white-space at the end of the lines. --- llvm/test/CodeGen/WebAssembly/unreachable.ll | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/WebAssembly/unreachable.ll b/llvm/test/CodeGen/WebAssembly/unreachable.ll index 72f865842bdca..5368c2ba5b8dc 100644 --- a/llvm/test/CodeGen/WebAssembly/unreachable.ll +++ b/llvm/test/CodeGen/WebAssembly/unreachable.ll @@ -80,11 +80,10 @@ define i32 @missing_ret_unreachable() { unreachable } -; This is similar to the above test, but ensures wasm unreachable is emitted -; This is similar to the above test, but the callee has a 'noreturn' attribute. -; There is an optimization that removes an 'unreachable' after a noreturn call, -; but Wasm backend doesn't use it and ignore `--no-trap-after-noreturn`, if -; given, to generate valid code. +; This is similar to the above test, but the callee has a 'noreturn' attribute. +; There is an optimization that removes an 'unreachable' after a noreturn call, +; but Wasm backend doesn't use it and ignore `--no-trap-after-noreturn`, if +; given, to generate valid code. define i32 @missing_ret_noreturn_unreachable() { ; CHECK-LABEL: missing_ret_noreturn_unreachable: ; CHECK: .functype missing_ret_noreturn_unreachable () -> (i32) From 93659947d2ee76348e300b83f035e054909d56b0 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 26 Oct 2023 10:29:12 -0700 Subject: [PATCH 102/877] [AArch64][GlobalISel] Add support for pre-indexed loads/stores. (#70185) The pre-index matcher just needs some small heuristics to make sure it doesn't cause regressions. Apart from that it's a simple change, since the only difference is an immediate operand of '1' vs '0' in the instruction. --- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 23 +- .../Target/AArch64/AArch64ISelLowering.cpp | 4 - .../GISel/AArch64InstructionSelector.cpp | 83 ++-- .../legalize-indexed-load-stores.mir | 22 + .../AArch64/GlobalISel/store-merging.ll | 4 +- .../CodeGen/AArch64/arm64-indexed-memory.ll | 303 +++---------- .../AArch64/arm64-indexed-vector-ldst.ll | 428 ++++++------------ 7 files changed, 287 insertions(+), 580 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 1cccddfd97222..3c2b5f490ccb8 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1141,11 +1141,28 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr, return false; } + // Avoid increasing cross-block register pressure. + for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) + if (AddrUse.getParent() != LdSt.getParent()) + return false; + // FIXME: check whether all uses of the base pointer are constant PtrAdds. // That might allow us to end base's liveness here by adjusting the constant. - - return all_of(MRI.use_nodbg_instructions(Addr), - [&](MachineInstr &UseMI) { return dominates(LdSt, UseMI); }); + bool RealUse = false; + for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) { + if (!dominates(LdSt, AddrUse)) + return false; // All use must be dominated by the load/store. + + // If Ptr may be folded in addressing mode of other use, then it's + // not profitable to do this transformation. + if (auto *UseLdSt = dyn_cast(&AddrUse)) { + if (!canFoldInAddressingMode(UseLdSt, TLI, MRI)) + RealUse = true; + } else { + RealUse = true; + } + } + return RealUse; } bool CombinerHelper::matchCombineIndexedLoadStore( diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 038c23b5e8d50..5acc2ce58e6af 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -23718,10 +23718,6 @@ bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { bool AArch64TargetLowering::isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, bool IsPre, MachineRegisterInfo &MRI) const { - // HACK - if (IsPre) - return false; // Until we implement. - auto CstOffset = getIConstantVRegVal(Offset, MRI); if (!CstOffset || CstOffset->isZero()) return false; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 941607dae29bb..9c5b34166ffaf 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -5659,24 +5659,34 @@ bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI, Register WriteBack = Ld.getWritebackReg(); Register Base = Ld.getBaseReg(); Register Offset = Ld.getOffsetReg(); - - if (Ld.isPre()) - return false; // TODO: add pre-inc support - - unsigned Opc = 0; - static constexpr unsigned GPROpcodes[] = { - AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost, - AArch64::LDRXpost}; - static constexpr unsigned FPROpcodes[] = { - AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost, - AArch64::LDRDpost, AArch64::LDRQpost}; - + LLT Ty = MRI.getType(Dst); + assert(Ty.getSizeInBits() <= 128 && "Unexpected type for indexed load"); unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes(); - if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) - Opc = FPROpcodes[Log2_32(MemSize)]; - else - Opc = GPROpcodes[Log2_32(MemSize)]; + unsigned Opc = 0; + if (Ld.isPre()) { + static constexpr unsigned GPROpcodes[] = { + AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre, + AArch64::LDRXpre}; + static constexpr unsigned FPROpcodes[] = { + AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre, + AArch64::LDRQpre}; + if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) + Opc = FPROpcodes[Log2_32(MemSize)]; + else + Opc = GPROpcodes[Log2_32(MemSize)]; + } else { + static constexpr unsigned GPROpcodes[] = { + AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost, + AArch64::LDRXpost}; + static constexpr unsigned FPROpcodes[] = { + AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost, + AArch64::LDRDpost, AArch64::LDRQpost}; + if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) + Opc = FPROpcodes[Log2_32(MemSize)]; + else + Opc = GPROpcodes[Log2_32(MemSize)]; + } auto Cst = getIConstantVRegVal(Offset, MRI); if (!Cst) return false; // Shouldn't happen, but just in case. @@ -5695,23 +5705,34 @@ bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I, Register Base = I.getBaseReg(); Register Offset = I.getOffsetReg(); LLT ValTy = MRI.getType(Val); - - if (I.isPre()) - return false; // TODO: add pre-inc support + assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store"); unsigned Opc = 0; - static constexpr unsigned GPROpcodes[] = { - AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost, - AArch64::STRXpost}; - static constexpr unsigned FPROpcodes[] = { - AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost, - AArch64::STRDpost, AArch64::STRQpost}; - - assert(ValTy.getSizeInBits() <= 128); - if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID) - Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())]; - else - Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())]; + if (I.isPre()) { + static constexpr unsigned GPROpcodes[] = { + AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre, + AArch64::STRXpre}; + static constexpr unsigned FPROpcodes[] = { + AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre, + AArch64::STRQpre}; + + if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID) + Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())]; + else + Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())]; + } else { + static constexpr unsigned GPROpcodes[] = { + AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost, + AArch64::STRXpost}; + static constexpr unsigned FPROpcodes[] = { + AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost, + AArch64::STRDpost, AArch64::STRQpost}; + + if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID) + Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())]; + else + Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())]; + } auto Cst = getIConstantVRegVal(Offset, MRI); if (!Cst) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir index e82a0c219068f..bd0317ec6a136 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir @@ -87,3 +87,25 @@ body: | $q0 = COPY %dst RET_ReallyLR implicit $x0, implicit $q0 ... +--- +name: pre_store_s64 +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: pre_store_s64 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 + ; CHECK-NEXT: %val:_(s64) = COPY $x1 + ; CHECK-NEXT: %offset:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: %writeback:_(p0) = G_INDEXED_STORE %val(s64), %ptr, %offset(s64), 1 :: (store (s64)) + ; CHECK-NEXT: $x0 = COPY %writeback(p0) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %ptr:_(p0) = COPY $x0 + %val:_(s64) = COPY $x1 + %offset:_(s64) = G_CONSTANT i64 8 + %writeback:_(p0) = G_INDEXED_STORE %val, %ptr, %offset, 1 :: (store (s64), align 8) + $x0 = COPY %writeback + RET_ReallyLR implicit $x0 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll index 23886d8bc4a7b..07744dada4f1f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll @@ -83,8 +83,8 @@ define void @test_simple_vector(ptr %ptr) { ; CHECK-NEXT: mov w8, #5 ; =0x5 ; CHECK-NEXT: strh w9, [x0, #2] ; CHECK-NEXT: mov w9, #8 ; =0x8 -; CHECK-NEXT: strh w8, [x0, #4] -; CHECK-NEXT: strh w9, [x0, #6] +; CHECK-NEXT: strh w8, [x0, #4]! +; CHECK-NEXT: strh w9, [x0, #2] ; CHECK-NEXT: ret store <2 x i16> , ptr %ptr %addr2 = getelementptr <2 x i16>, ptr %ptr, i64 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll index a4da489171403..dc8cbd1e43b90 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -197,88 +197,40 @@ define ptr @storef64(ptr %ptr, double %index, double %spacing) { define ptr @pref64(ptr %ptr, double %spacing) { -; CHECK64-LABEL: pref64: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: str d0, [x0, #32]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pref64: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: add x0, x0, #32 -; GISEL-NEXT: str d0, [x8, #32] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pref64: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: str d0, [x0, #32]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pref64: +; CHECK: ; %bb.0: +; CHECK-NEXT: str d0, [x0, #32]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds double, ptr %ptr, i64 4 store double %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr } define ptr @pref32(ptr %ptr, float %spacing) { -; CHECK64-LABEL: pref32: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: str s0, [x0, #12]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pref32: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: add x0, x0, #12 -; GISEL-NEXT: str s0, [x8, #12] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pref32: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: str s0, [x0, #12]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pref32: +; CHECK: ; %bb.0: +; CHECK-NEXT: str s0, [x0, #12]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds float, ptr %ptr, i64 3 store float %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr } define ptr @pref16(ptr %ptr, half %spacing) nounwind { -; CHECK64-LABEL: pref16: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: str h0, [x0, #6]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pref16: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: add x0, x0, #6 -; GISEL-NEXT: str h0, [x8, #6] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pref16: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: str h0, [x0, #6]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pref16: +; CHECK: ; %bb.0: +; CHECK-NEXT: str h0, [x0, #6]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds half, ptr %ptr, i64 3 store half %spacing, ptr %incdec.ptr, align 2 ret ptr %incdec.ptr } define ptr @pre64(ptr %ptr, i64 %spacing) { -; CHECK64-LABEL: pre64: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: str x1, [x0, #16]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pre64: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: add x0, x0, #16 -; GISEL-NEXT: str x1, [x8, #16] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pre64: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: str x1, [x0, #16]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pre64: +; CHECK: ; %bb.0: +; CHECK-NEXT: str x1, [x0, #16]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i64, ptr %ptr, i64 2 store i64 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr @@ -297,44 +249,20 @@ define ptr @pre64idxpos256(ptr %ptr, i64 %spacing) { } define ptr @pre64idxneg256(ptr %ptr, i64 %spacing) { -; CHECK64-LABEL: pre64idxneg256: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: str x1, [x0, #-256]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pre64idxneg256: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: sub x0, x0, #256 -; GISEL-NEXT: stur x1, [x8, #-256] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pre64idxneg256: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: str x1, [x0, #-256]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pre64idxneg256: +; CHECK: ; %bb.0: +; CHECK-NEXT: str x1, [x0, #-256]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i64, ptr %ptr, i64 -32 store i64 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr } define ptr @pre32(ptr %ptr, i32 %spacing) { -; CHECK64-LABEL: pre32: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: str w1, [x0, #8]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pre32: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: add x0, x0, #8 -; GISEL-NEXT: str w1, [x8, #8] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pre32: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: str w1, [x0, #8]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pre32: +; CHECK: ; %bb.0: +; CHECK-NEXT: str w1, [x0, #8]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 2 store i32 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr @@ -353,44 +281,20 @@ define ptr @pre32idxpos256(ptr %ptr, i32 %spacing) { } define ptr @pre32idxneg256(ptr %ptr, i32 %spacing) { -; CHECK64-LABEL: pre32idxneg256: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: str w1, [x0, #-256]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pre32idxneg256: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: sub x0, x0, #256 -; GISEL-NEXT: stur w1, [x8, #-256] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pre32idxneg256: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: str w1, [x0, #-256]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pre32idxneg256: +; CHECK: ; %bb.0: +; CHECK-NEXT: str w1, [x0, #-256]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 -64 store i32 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr } define ptr @pre16(ptr %ptr, i16 %spacing) { -; CHECK64-LABEL: pre16: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: strh w1, [x0, #4]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pre16: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: add x0, x0, #4 -; GISEL-NEXT: strh w1, [x8, #4] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pre16: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: strh w1, [x0, #4]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pre16: +; CHECK: ; %bb.0: +; CHECK-NEXT: strh w1, [x0, #4]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 2 store i16 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr @@ -409,44 +313,20 @@ define ptr @pre16idxpos256(ptr %ptr, i16 %spacing) { } define ptr @pre16idxneg256(ptr %ptr, i16 %spacing) { -; CHECK64-LABEL: pre16idxneg256: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: strh w1, [x0, #-256]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pre16idxneg256: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: sub x0, x0, #256 -; GISEL-NEXT: sturh w1, [x8, #-256] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pre16idxneg256: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: strh w1, [x0, #-256]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pre16idxneg256: +; CHECK: ; %bb.0: +; CHECK-NEXT: strh w1, [x0, #-256]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 -128 store i16 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr } define ptr @pre8(ptr %ptr, i8 %spacing) { -; CHECK64-LABEL: pre8: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: strb w1, [x0, #2]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pre8: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: add x0, x0, #2 -; GISEL-NEXT: strb w1, [x8, #2] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pre8: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: strb w1, [x0, #2]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pre8: +; CHECK: ; %bb.0: +; CHECK-NEXT: strb w1, [x0, #2]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 2 store i8 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr @@ -465,44 +345,20 @@ define ptr @pre8idxpos256(ptr %ptr, i8 %spacing) { } define ptr @pre8idxneg256(ptr %ptr, i8 %spacing) { -; CHECK64-LABEL: pre8idxneg256: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: strb w1, [x0, #-256]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pre8idxneg256: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: sub x0, x0, #256 -; GISEL-NEXT: sturb w1, [x8, #-256] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pre8idxneg256: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: strb w1, [x0, #-256]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pre8idxneg256: +; CHECK: ; %bb.0: +; CHECK-NEXT: strb w1, [x0, #-256]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 -256 store i8 %spacing, ptr %incdec.ptr, align 4 ret ptr %incdec.ptr } define ptr @pretrunc64to32(ptr %ptr, i64 %spacing) { -; CHECK64-LABEL: pretrunc64to32: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: str w1, [x0, #8]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pretrunc64to32: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: add x0, x0, #8 -; GISEL-NEXT: str w1, [x8, #8] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pretrunc64to32: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: str w1, [x0, #8]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pretrunc64to32: +; CHECK: ; %bb.0: +; CHECK-NEXT: str w1, [x0, #8]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 2 %trunc = trunc i64 %spacing to i32 store i32 %trunc, ptr %incdec.ptr, align 4 @@ -510,22 +366,10 @@ define ptr @pretrunc64to32(ptr %ptr, i64 %spacing) { } define ptr @pretrunc64to16(ptr %ptr, i64 %spacing) { -; CHECK64-LABEL: pretrunc64to16: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: strh w1, [x0, #4]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pretrunc64to16: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: add x0, x0, #4 -; GISEL-NEXT: strh w1, [x8, #4] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pretrunc64to16: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: strh w1, [x0, #4]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pretrunc64to16: +; CHECK: ; %bb.0: +; CHECK-NEXT: strh w1, [x0, #4]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 2 %trunc = trunc i64 %spacing to i16 store i16 %trunc, ptr %incdec.ptr, align 4 @@ -533,22 +377,10 @@ define ptr @pretrunc64to16(ptr %ptr, i64 %spacing) { } define ptr @pretrunc64to8(ptr %ptr, i64 %spacing) { -; CHECK64-LABEL: pretrunc64to8: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: strb w1, [x0, #2]! -; CHECK64-NEXT: ret -; -; GISEL-LABEL: pretrunc64to8: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x8, x0 -; GISEL-NEXT: add x0, x0, #2 -; GISEL-NEXT: strb w1, [x8, #2] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: pretrunc64to8: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: strb w1, [x0, #2]! -; CHECK32-NEXT: ret +; CHECK-LABEL: pretrunc64to8: +; CHECK: ; %bb.0: +; CHECK-NEXT: strb w1, [x0, #2]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 2 %trunc = trunc i64 %spacing to i8 store i8 %trunc, ptr %incdec.ptr, align 4 @@ -583,24 +415,11 @@ define ptr @preidxf32(ptr %src, ptr %out) { } define ptr @preidxf16(ptr %src, ptr %out) { -; CHECK64-LABEL: preidxf16: -; CHECK64: ; %bb.0: -; CHECK64-NEXT: ldr h0, [x0, #2]! -; CHECK64-NEXT: str h0, [x1] -; CHECK64-NEXT: ret -; -; GISEL-LABEL: preidxf16: -; GISEL: ; %bb.0: -; GISEL-NEXT: ldr h0, [x0, #2] -; GISEL-NEXT: add x0, x0, #2 -; GISEL-NEXT: str h0, [x1] -; GISEL-NEXT: ret -; -; CHECK32-LABEL: preidxf16: -; CHECK32: ; %bb.0: -; CHECK32-NEXT: ldr h0, [x0, #2]! -; CHECK32-NEXT: str h0, [x1] -; CHECK32-NEXT: ret +; CHECK-LABEL: preidxf16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr h0, [x0, #2]! +; CHECK-NEXT: str h0, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds half, ptr %src, i64 1 %tmp = load half, ptr %ptr, align 2 store half %tmp, ptr %out, align 2 diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 46563f6a8e089..0d7620d1c883d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -626,20 +626,12 @@ @ptr = global ptr null define <8 x i8> @test_v8i8_pre_load(ptr %addr) { -; SDAG-LABEL: test_v8i8_pre_load: -; SDAG: ; %bb.0: -; SDAG-NEXT: ldr d0, [x0, #40]! -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v8i8_pre_load: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr d0, [x0, #40] -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #40 -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v8i8_pre_load: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0, #40]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <8 x i8>, ptr %addr, i32 5 %val = load <8 x i8>, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -660,20 +652,12 @@ define <8 x i8> @test_v8i8_post_load(ptr %addr) { } define void @test_v8i8_pre_store(<8 x i8> %in, ptr %addr) { -; SDAG-LABEL: test_v8i8_pre_store: -; SDAG: ; %bb.0: -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str d0, [x0, #40]! -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v8i8_pre_store: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #40 -; CHECK-GISEL-NEXT: str d0, [x0, #40] -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v8i8_pre_store: +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0, #40]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <8 x i8>, ptr %addr, i32 5 store <8 x i8> %in, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -694,20 +678,12 @@ define void @test_v8i8_post_store(<8 x i8> %in, ptr %addr) { } define <4 x i16> @test_v4i16_pre_load(ptr %addr) { -; SDAG-LABEL: test_v4i16_pre_load: -; SDAG: ; %bb.0: -; SDAG-NEXT: ldr d0, [x0, #40]! -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v4i16_pre_load: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr d0, [x0, #40] -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #40 -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v4i16_pre_load: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0, #40]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x i16>, ptr %addr, i32 5 %val = load <4 x i16>, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -728,20 +704,12 @@ define <4 x i16> @test_v4i16_post_load(ptr %addr) { } define void @test_v4i16_pre_store(<4 x i16> %in, ptr %addr) { -; SDAG-LABEL: test_v4i16_pre_store: -; SDAG: ; %bb.0: -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str d0, [x0, #40]! -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v4i16_pre_store: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #40 -; CHECK-GISEL-NEXT: str d0, [x0, #40] -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v4i16_pre_store: +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0, #40]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x i16>, ptr %addr, i32 5 store <4 x i16> %in, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -762,20 +730,12 @@ define void @test_v4i16_post_store(<4 x i16> %in, ptr %addr) { } define <2 x i32> @test_v2i32_pre_load(ptr %addr) { -; SDAG-LABEL: test_v2i32_pre_load: -; SDAG: ; %bb.0: -; SDAG-NEXT: ldr d0, [x0, #40]! -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v2i32_pre_load: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr d0, [x0, #40] -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #40 -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v2i32_pre_load: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0, #40]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x i32>, ptr %addr, i32 5 %val = load <2 x i32>, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -796,20 +756,12 @@ define <2 x i32> @test_v2i32_post_load(ptr %addr) { } define void @test_v2i32_pre_store(<2 x i32> %in, ptr %addr) { -; SDAG-LABEL: test_v2i32_pre_store: -; SDAG: ; %bb.0: -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str d0, [x0, #40]! -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v2i32_pre_store: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #40 -; CHECK-GISEL-NEXT: str d0, [x0, #40] -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v2i32_pre_store: +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0, #40]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x i32>, ptr %addr, i32 5 store <2 x i32> %in, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -830,20 +782,12 @@ define void @test_v2i32_post_store(<2 x i32> %in, ptr %addr) { } define <2 x float> @test_v2f32_pre_load(ptr %addr) { -; SDAG-LABEL: test_v2f32_pre_load: -; SDAG: ; %bb.0: -; SDAG-NEXT: ldr d0, [x0, #40]! -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v2f32_pre_load: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr d0, [x0, #40] -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #40 -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v2f32_pre_load: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0, #40]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x float>, ptr %addr, i32 5 %val = load <2 x float>, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -864,20 +808,12 @@ define <2 x float> @test_v2f32_post_load(ptr %addr) { } define void @test_v2f32_pre_store(<2 x float> %in, ptr %addr) { -; SDAG-LABEL: test_v2f32_pre_store: -; SDAG: ; %bb.0: -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str d0, [x0, #40]! -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v2f32_pre_store: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #40 -; CHECK-GISEL-NEXT: str d0, [x0, #40] -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v2f32_pre_store: +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0, #40]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x float>, ptr %addr, i32 5 store <2 x float> %in, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -907,10 +843,10 @@ define <1 x i64> @test_v1i64_pre_load(ptr %addr) { ; ; CHECK-GISEL-LABEL: test_v1i64_pre_load: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr d0, [x0, #40] -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #40 -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] +; CHECK-GISEL-NEXT: ldr x8, [x0, #40]! +; CHECK-GISEL-NEXT: adrp x9, _ptr@PAGE +; CHECK-GISEL-NEXT: str x0, [x9, _ptr@PAGEOFF] +; CHECK-GISEL-NEXT: fmov d0, x8 ; CHECK-GISEL-NEXT: ret %newaddr = getelementptr <1 x i64>, ptr %addr, i32 5 %val = load <1 x i64>, ptr %newaddr, align 8 @@ -940,20 +876,12 @@ define <1 x i64> @test_v1i64_post_load(ptr %addr) { } define void @test_v1i64_pre_store(<1 x i64> %in, ptr %addr) { -; SDAG-LABEL: test_v1i64_pre_store: -; SDAG: ; %bb.0: -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str d0, [x0, #40]! -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v1i64_pre_store: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #40 -; CHECK-GISEL-NEXT: str d0, [x0, #40] -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v1i64_pre_store: +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0, #40]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <1 x i64>, ptr %addr, i32 5 store <1 x i64> %in, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -974,20 +902,12 @@ define void @test_v1i64_post_store(<1 x i64> %in, ptr %addr) { } define <16 x i8> @test_v16i8_pre_load(ptr %addr) { -; SDAG-LABEL: test_v16i8_pre_load: -; SDAG: ; %bb.0: -; SDAG-NEXT: ldr q0, [x0, #80]! -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v16i8_pre_load: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr q0, [x0, #80] -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #80 -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v16i8_pre_load: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0, #80]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <16 x i8>, ptr %addr, i32 5 %val = load <16 x i8>, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -1008,20 +928,12 @@ define <16 x i8> @test_v16i8_post_load(ptr %addr) { } define void @test_v16i8_pre_store(<16 x i8> %in, ptr %addr) { -; SDAG-LABEL: test_v16i8_pre_store: -; SDAG: ; %bb.0: -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str q0, [x0, #80]! -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v16i8_pre_store: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #80 -; CHECK-GISEL-NEXT: str q0, [x0, #80] -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v16i8_pre_store: +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0, #80]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <16 x i8>, ptr %addr, i32 5 store <16 x i8> %in, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -1042,20 +954,12 @@ define void @test_v16i8_post_store(<16 x i8> %in, ptr %addr) { } define <8 x i16> @test_v8i16_pre_load(ptr %addr) { -; SDAG-LABEL: test_v8i16_pre_load: -; SDAG: ; %bb.0: -; SDAG-NEXT: ldr q0, [x0, #80]! -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v8i16_pre_load: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr q0, [x0, #80] -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #80 -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v8i16_pre_load: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0, #80]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <8 x i16>, ptr %addr, i32 5 %val = load <8 x i16>, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -1076,20 +980,12 @@ define <8 x i16> @test_v8i16_post_load(ptr %addr) { } define void @test_v8i16_pre_store(<8 x i16> %in, ptr %addr) { -; SDAG-LABEL: test_v8i16_pre_store: -; SDAG: ; %bb.0: -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str q0, [x0, #80]! -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v8i16_pre_store: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #80 -; CHECK-GISEL-NEXT: str q0, [x0, #80] -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v8i16_pre_store: +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0, #80]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <8 x i16>, ptr %addr, i32 5 store <8 x i16> %in, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -1110,20 +1006,12 @@ define void @test_v8i16_post_store(<8 x i16> %in, ptr %addr) { } define <4 x i32> @test_v4i32_pre_load(ptr %addr) { -; SDAG-LABEL: test_v4i32_pre_load: -; SDAG: ; %bb.0: -; SDAG-NEXT: ldr q0, [x0, #80]! -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v4i32_pre_load: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr q0, [x0, #80] -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #80 -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v4i32_pre_load: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0, #80]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x i32>, ptr %addr, i32 5 %val = load <4 x i32>, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -1144,20 +1032,12 @@ define <4 x i32> @test_v4i32_post_load(ptr %addr) { } define void @test_v4i32_pre_store(<4 x i32> %in, ptr %addr) { -; SDAG-LABEL: test_v4i32_pre_store: -; SDAG: ; %bb.0: -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str q0, [x0, #80]! -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v4i32_pre_store: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #80 -; CHECK-GISEL-NEXT: str q0, [x0, #80] -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v4i32_pre_store: +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0, #80]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x i32>, ptr %addr, i32 5 store <4 x i32> %in, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -1179,20 +1059,12 @@ define void @test_v4i32_post_store(<4 x i32> %in, ptr %addr) { define <4 x float> @test_v4f32_pre_load(ptr %addr) { -; SDAG-LABEL: test_v4f32_pre_load: -; SDAG: ; %bb.0: -; SDAG-NEXT: ldr q0, [x0, #80]! -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v4f32_pre_load: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr q0, [x0, #80] -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #80 -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v4f32_pre_load: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0, #80]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x float>, ptr %addr, i32 5 %val = load <4 x float>, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -1213,20 +1085,12 @@ define <4 x float> @test_v4f32_post_load(ptr %addr) { } define void @test_v4f32_pre_store(<4 x float> %in, ptr %addr) { -; SDAG-LABEL: test_v4f32_pre_store: -; SDAG: ; %bb.0: -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str q0, [x0, #80]! -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v4f32_pre_store: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #80 -; CHECK-GISEL-NEXT: str q0, [x0, #80] -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v4f32_pre_store: +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0, #80]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x float>, ptr %addr, i32 5 store <4 x float> %in, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -1248,20 +1112,12 @@ define void @test_v4f32_post_store(<4 x float> %in, ptr %addr) { define <2 x i64> @test_v2i64_pre_load(ptr %addr) { -; SDAG-LABEL: test_v2i64_pre_load: -; SDAG: ; %bb.0: -; SDAG-NEXT: ldr q0, [x0, #80]! -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v2i64_pre_load: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr q0, [x0, #80] -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #80 -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v2i64_pre_load: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0, #80]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x i64>, ptr %addr, i32 5 %val = load <2 x i64>, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -1282,20 +1138,12 @@ define <2 x i64> @test_v2i64_post_load(ptr %addr) { } define void @test_v2i64_pre_store(<2 x i64> %in, ptr %addr) { -; SDAG-LABEL: test_v2i64_pre_store: -; SDAG: ; %bb.0: -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str q0, [x0, #80]! -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v2i64_pre_store: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #80 -; CHECK-GISEL-NEXT: str q0, [x0, #80] -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v2i64_pre_store: +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0, #80]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x i64>, ptr %addr, i32 5 store <2 x i64> %in, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -1317,20 +1165,12 @@ define void @test_v2i64_post_store(<2 x i64> %in, ptr %addr) { define <2 x double> @test_v2f64_pre_load(ptr %addr) { -; SDAG-LABEL: test_v2f64_pre_load: -; SDAG: ; %bb.0: -; SDAG-NEXT: ldr q0, [x0, #80]! -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v2f64_pre_load: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ldr q0, [x0, #80] -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #80 -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v2f64_pre_load: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0, #80]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x double>, ptr %addr, i32 5 %val = load <2 x double>, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr @@ -1351,20 +1191,12 @@ define <2 x double> @test_v2f64_post_load(ptr %addr) { } define void @test_v2f64_pre_store(<2 x double> %in, ptr %addr) { -; SDAG-LABEL: test_v2f64_pre_store: -; SDAG: ; %bb.0: -; SDAG-NEXT: adrp x8, _ptr@PAGE -; SDAG-NEXT: str q0, [x0, #80]! -; SDAG-NEXT: str x0, [x8, _ptr@PAGEOFF] -; SDAG-NEXT: ret -; -; CHECK-GISEL-LABEL: test_v2f64_pre_store: -; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: adrp x8, _ptr@PAGE -; CHECK-GISEL-NEXT: add x9, x0, #80 -; CHECK-GISEL-NEXT: str q0, [x0, #80] -; CHECK-GISEL-NEXT: str x9, [x8, _ptr@PAGEOFF] -; CHECK-GISEL-NEXT: ret +; CHECK-LABEL: test_v2f64_pre_store: +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0, #80]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x double>, ptr %addr, i32 5 store <2 x double> %in, ptr %newaddr, align 8 store ptr %newaddr, ptr @ptr From 7fa19e6f4b87623b0ca1a23bf6b6293c1b5e5799 Mon Sep 17 00:00:00 2001 From: Nishant Patel Date: Thu, 26 Oct 2023 10:41:09 -0700 Subject: [PATCH 103/877] [MLIR] Add SyclRuntimeWrapper (#69648) --- mlir/CMakeLists.txt | 1 + mlir/cmake/modules/FindLevelZero.cmake | 221 ++++++++++++++++++ mlir/cmake/modules/FindSyclRuntime.cmake | 68 ++++++ mlir/lib/ExecutionEngine/CMakeLists.txt | 36 +++ .../ExecutionEngine/SyclRuntimeWrappers.cpp | 209 +++++++++++++++++ 5 files changed, 535 insertions(+) create mode 100644 mlir/cmake/modules/FindLevelZero.cmake create mode 100644 mlir/cmake/modules/FindSyclRuntime.cmake create mode 100644 mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt index ac120aad0d1ed..16ff950089734 100644 --- a/mlir/CMakeLists.txt +++ b/mlir/CMakeLists.txt @@ -126,6 +126,7 @@ add_definitions(-DMLIR_ROCM_CONVERSIONS_ENABLED=${MLIR_ENABLE_ROCM_CONVERSIONS}) set(MLIR_ENABLE_DEPRECATED_GPU_SERIALIZATION 0 CACHE BOOL "Enable deprecated GPU serialization passes") set(MLIR_ENABLE_CUDA_RUNNER 0 CACHE BOOL "Enable building the mlir CUDA runner") set(MLIR_ENABLE_ROCM_RUNNER 0 CACHE BOOL "Enable building the mlir ROCm runner") +set(MLIR_ENABLE_SYCL_RUNNER 0 CACHE BOOL "Enable building the mlir Sycl runner") set(MLIR_ENABLE_SPIRV_CPU_RUNNER 0 CACHE BOOL "Enable building the mlir SPIR-V cpu runner") set(MLIR_ENABLE_VULKAN_RUNNER 0 CACHE BOOL "Enable building the mlir Vulkan runner") set(MLIR_ENABLE_NVPTXCOMPILER 0 CACHE BOOL diff --git a/mlir/cmake/modules/FindLevelZero.cmake b/mlir/cmake/modules/FindLevelZero.cmake new file mode 100644 index 0000000000000..012187f0afc0b --- /dev/null +++ b/mlir/cmake/modules/FindLevelZero.cmake @@ -0,0 +1,221 @@ +# CMake find_package() module for level-zero +# +# Example usage: +# +# find_package(LevelZero) +# +# If successful, the following variables will be defined: +# LevelZero_FOUND +# LevelZero_INCLUDE_DIRS +# LevelZero_LIBRARY +# LevelZero_LIBRARIES_DIR +# +# By default, the module searches the standard paths to locate the "ze_api.h" +# and the ze_loader shared library. When using a custom level-zero installation, +# the environment variable "LEVEL_ZERO_DIR" should be specified telling the +# module to get the level-zero library and headers from that location. + +include(FindPackageHandleStandardArgs) + +# Search path priority +# 1. CMake Variable LEVEL_ZERO_DIR +# 2. Environment Variable LEVEL_ZERO_DIR + +if(NOT LEVEL_ZERO_DIR) + if(DEFINED ENV{LEVEL_ZERO_DIR}) + set(LEVEL_ZERO_DIR "$ENV{LEVEL_ZERO_DIR}") + endif() +endif() + +if(LEVEL_ZERO_DIR) + find_path(LevelZero_INCLUDE_DIR + NAMES level_zero/ze_api.h + PATHS ${LEVEL_ZERO_DIR}/include + NO_DEFAULT_PATH + ) + + if(LINUX) + find_library(LevelZero_LIBRARY + NAMES ze_loader + PATHS ${LEVEL_ZERO_DIR}/lib + ${LEVEL_ZERO_DIR}/lib/x86_64-linux-gnu + NO_DEFAULT_PATH + ) + else() + find_library(LevelZero_LIBRARY + NAMES ze_loader + PATHS ${LEVEL_ZERO_DIR}/lib + NO_DEFAULT_PATH + ) + endif() +else() + find_path(LevelZero_INCLUDE_DIR + NAMES level_zero/ze_api.h + ) + + find_library(LevelZero_LIBRARY + NAMES ze_loader + ) +endif() + +# Compares the two version string that are supposed to be in x.y.z format +# and reports if the argument VERSION_STR1 is greater than or equal than +# version_str2. The strings are compared lexicographically after conversion to +# lists of equal lengths, with the shorter string getting zero-padded. +function(compare_versions VERSION_STR1 VERSION_STR2 OUTPUT) + # Convert the strings to list + string(REPLACE "." ";" VL1 ${VERSION_STR1}) + string(REPLACE "." ";" VL2 ${VERSION_STR2}) + # get lengths of both lists + list(LENGTH VL1 VL1_LEN) + list(LENGTH VL2 VL2_LEN) + set(LEN ${VL1_LEN}) + # If they differ in size pad the shorter list with 0s + if(VL1_LEN GREATER VL2_LEN) + math(EXPR DIFF "${VL1_LEN} - ${VL2_LEN}" OUTPUT_FORMAT DECIMAL) + foreach(IDX RANGE 1 ${DIFF} 1) + list(APPEND VL2 "0") + endforeach() + elseif(VL2_LEN GREATER VL2_LEN) + math(EXPR DIFF "${VL1_LEN} - ${VL2_LEN}" OUTPUT_FORMAT DECIMAL) + foreach(IDX RANGE 1 ${DIFF} 1) + list(APPEND VL2 "0") + endforeach() + set(LEN ${VL2_LEN}) + endif() + math(EXPR LEN_SUB_ONE "${LEN}-1") + foreach(IDX RANGE 0 ${LEN_SUB_ONE} 1) + list(GET VL1 ${IDX} VAL1) + list(GET VL2 ${IDX} VAL2) + + if(${VAL1} GREATER ${VAL2}) + set(${OUTPUT} TRUE PARENT_SCOPE) + break() + elseif(${VAL1} LESS ${VAL2}) + set(${OUTPUT} FALSE PARENT_SCOPE) + break() + else() + set(${OUTPUT} TRUE PARENT_SCOPE) + endif() + endforeach() + + endfunction(compare_versions) + +# Creates a small function to run and extract the LevelZero loader version. +function(get_l0_loader_version) + + set(L0_VERSIONEER_SRC + [====[ + #include + #include + #include + int main() { + ze_result_t result; + std::string loader("loader"); + zel_component_version_t *versions; + size_t size = 0; + result = zeInit(0); + if (result != ZE_RESULT_SUCCESS) { + std::cerr << "Failed to init ze driver" << std::endl; + return -1; + } + zelLoaderGetVersions(&size, nullptr); + versions = new zel_component_version_t[size]; + zelLoaderGetVersions(&size, versions); + for (size_t i = 0; i < size; i++) { + if (loader.compare(versions[i].component_name) == 0) { + std::cout << versions[i].component_lib_version.major << "." + << versions[i].component_lib_version.minor << "." + << versions[i].component_lib_version.patch; + break; + } + } + delete[] versions; + return 0; + } + ]====] + ) + + set(L0_VERSIONEER_FILE ${CMAKE_BINARY_DIR}/temp/l0_versioneer.cpp) + + file(WRITE ${L0_VERSIONEER_FILE} "${L0_VERSIONEER_SRC}") + + # We need both the directories in the include path as ze_loader.h + # includes "ze_api.h" and not "level_zero/ze_api.h". + list(APPEND INCLUDE_DIRS ${LevelZero_INCLUDE_DIR}) + list(APPEND INCLUDE_DIRS ${LevelZero_INCLUDE_DIR}/level_zero) + list(JOIN INCLUDE_DIRS ";" INCLUDE_DIRS_STR) + try_run(L0_VERSIONEER_RUN L0_VERSIONEER_COMPILE + "${CMAKE_BINARY_DIR}" + "${L0_VERSIONEER_FILE}" + LINK_LIBRARIES ${LevelZero_LIBRARY} + CMAKE_FLAGS + "-DINCLUDE_DIRECTORIES=${INCLUDE_DIRS_STR}" + RUN_OUTPUT_VARIABLE L0_VERSION + ) + if(${L0_VERSIONEER_COMPILE} AND (DEFINED L0_VERSIONEER_RUN)) + set(LevelZero_VERSION ${L0_VERSION} PARENT_SCOPE) + message(STATUS "Found Level Zero of version: ${L0_VERSION}") + else() + message(FATAL_ERROR + "Could not compile a level-zero program to extract loader version" + ) + endif() +endfunction(get_l0_loader_version) + +if(LevelZero_INCLUDE_DIR AND LevelZero_LIBRARY) + list(APPEND LevelZero_LIBRARIES "${LevelZero_LIBRARY}") + list(APPEND LevelZero_INCLUDE_DIRS ${LevelZero_INCLUDE_DIR}) + if(OpenCL_FOUND) + list(APPEND LevelZero_INCLUDE_DIRS ${OpenCL_INCLUDE_DIRS}) + endif() + + cmake_path(GET LevelZero_LIBRARY PARENT_PATH LevelZero_LIBRARIES_PATH) + set(LevelZero_LIBRARIES_DIR ${LevelZero_LIBRARIES_PATH}) + + if(NOT TARGET LevelZero::LevelZero) + add_library(LevelZero::LevelZero INTERFACE IMPORTED) + set_target_properties(LevelZero::LevelZero + PROPERTIES INTERFACE_LINK_LIBRARIES "${LevelZero_LIBRARIES}" + ) + set_target_properties(LevelZero::LevelZero + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LevelZero_INCLUDE_DIRS}" + ) + endif() +endif() + +# Check if a specific version of Level Zero is required +if(LevelZero_FIND_VERSION) + get_l0_loader_version() + set(VERSION_GT_FIND_VERSION FALSE) + compare_versions( + ${LevelZero_VERSION} + ${LevelZero_FIND_VERSION} + VERSION_GT_FIND_VERSION + ) + if(${VERSION_GT_FIND_VERSION}) + set(LevelZero_FOUND TRUE) + else() + set(LevelZero_FOUND FALSE) + endif() +else() + set(LevelZero_FOUND TRUE) +endif() + +find_package_handle_standard_args(LevelZero + REQUIRED_VARS + LevelZero_FOUND + LevelZero_INCLUDE_DIRS + LevelZero_LIBRARY + LevelZero_LIBRARIES_DIR + HANDLE_COMPONENTS +) +mark_as_advanced(LevelZero_LIBRARY LevelZero_INCLUDE_DIRS) + +if(LevelZero_FOUND) + find_package_message(LevelZero "Found LevelZero: ${LevelZero_LIBRARY}" + "(found version ${LevelZero_VERSION})" + ) +else() + find_package_message(LevelZero "Could not find LevelZero" "") +endif() diff --git a/mlir/cmake/modules/FindSyclRuntime.cmake b/mlir/cmake/modules/FindSyclRuntime.cmake new file mode 100644 index 0000000000000..38b065a3f284c --- /dev/null +++ b/mlir/cmake/modules/FindSyclRuntime.cmake @@ -0,0 +1,68 @@ +# CMake find_package() module for SYCL Runtime +# +# Example usage: +# +# find_package(SyclRuntime) +# +# If successful, the following variables will be defined: +# SyclRuntime_FOUND +# SyclRuntime_INCLUDE_DIRS +# SyclRuntime_LIBRARY +# SyclRuntime_LIBRARIES_DIR +# + +include(FindPackageHandleStandardArgs) + +if(NOT DEFINED ENV{CMPLR_ROOT}) + message(WARNING "Please make sure to install Intel DPC++ Compiler and run setvars.(sh/bat)") + message(WARNING "You can download standalone Intel DPC++ Compiler from https://www.intel.com/content/www/us/en/developer/articles/tool/oneapi-standalone-components.html#compilers") +else() + if(LINUX OR (${CMAKE_SYSTEM_NAME} MATCHES "Linux")) + set(SyclRuntime_ROOT "$ENV{CMPLR_ROOT}/linux") + elseif(WIN32) + set(SyclRuntime_ROOT "$ENV{CMPLR_ROOT}/windows") + endif() + list(APPEND SyclRuntime_INCLUDE_DIRS "${SyclRuntime_ROOT}/include") + list(APPEND SyclRuntime_INCLUDE_DIRS "${SyclRuntime_ROOT}/include/sycl") + + set(SyclRuntime_LIBRARY_DIR "${SyclRuntime_ROOT}/lib") + + message(STATUS "SyclRuntime_LIBRARY_DIR: ${SyclRuntime_LIBRARY_DIR}") + find_library(SyclRuntime_LIBRARY + NAMES sycl + PATHS ${SyclRuntime_LIBRARY_DIR} + NO_DEFAULT_PATH + ) +endif() + +if(SyclRuntime_LIBRARY) + set(SyclRuntime_FOUND TRUE) + if(NOT TARGET SyclRuntime::SyclRuntime) + add_library(SyclRuntime::SyclRuntime INTERFACE IMPORTED) + set_target_properties(SyclRuntime::SyclRuntime + PROPERTIES INTERFACE_LINK_LIBRARIES "${SyclRuntime_LIBRARY}" + ) + set_target_properties(SyclRuntime::SyclRuntime + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${SyclRuntime_INCLUDE_DIRS}" + ) + endif() +else() + set(SyclRuntime_FOUND FALSE) +endif() + +find_package_handle_standard_args(SyclRuntime + REQUIRED_VARS + SyclRuntime_FOUND + SyclRuntime_INCLUDE_DIRS + SyclRuntime_LIBRARY + SyclRuntime_LIBRARY_DIR + HANDLE_COMPONENTS +) + +mark_as_advanced(SyclRuntime_LIBRARY SyclRuntime_INCLUDE_DIRS) + +if(SyclRuntime_FOUND) + find_package_message(SyclRuntime "Found SyclRuntime: ${SyclRuntime_LIBRARY}" "") +else() + find_package_message(SyclRuntime "Could not find SyclRuntime" "") +endif() diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt index ea33c2c6ed261..fdc797763ae3a 100644 --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -12,6 +12,7 @@ set(LLVM_OPTIONAL_SOURCES RunnerUtils.cpp OptUtils.cpp JitRunner.cpp + SyclRuntimeWrappers.cpp ) # Use a separate library for OptUtils, to avoid pulling in the entire JIT and @@ -328,4 +329,39 @@ if(LLVM_ENABLE_PIC) hip::host hip::amdhip64 ) endif() + + if(MLIR_ENABLE_SYCL_RUNNER) + find_package(SyclRuntime) + + if(NOT SyclRuntime_FOUND) + message(FATAL_ERROR "syclRuntime not found. Please set check oneapi installation and run setvars.sh.") + endif() + + find_package(LevelZero) + + if(NOT LevelZero_FOUND) + message(FATAL_ERROR "LevelZero not found. Please set LEVEL_ZERO_DIR.") + endif() + + add_mlir_library(mlir_sycl_runtime + SHARED + SyclRuntimeWrappers.cpp + + EXCLUDE_FROM_LIBMLIR + ) + + check_cxx_compiler_flag("-frtti" CXX_HAS_FRTTI_FLAG) + if(NOT CXX_HAS_FRTTI_FLAG) + message(FATAL_ERROR "CXX compiler does not accept flag -frtti") + endif() + target_compile_options (mlir_sycl_runtime PUBLIC -fexceptions -frtti) + + target_include_directories(mlir_sycl_runtime PRIVATE + ${MLIR_INCLUDE_DIRS} + ) + + target_link_libraries(mlir_sycl_runtime PRIVATE LevelZero::LevelZero SyclRuntime::SyclRuntime) + + set_property(TARGET mlir_sycl_runtime APPEND PROPERTY BUILD_RPATH "${LevelZero_LIBRARIES_DIR}" "${SyclRuntime_LIBRARIES_DIR}") + endif() endif() diff --git a/mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp new file mode 100644 index 0000000000000..c250340c38fc7 --- /dev/null +++ b/mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp @@ -0,0 +1,209 @@ +//===- SyclRuntimeWrappers.cpp - MLIR SYCL wrapper library ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements wrappers around the sycl runtime library with C linkage +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#ifdef _WIN32 +#define SYCL_RUNTIME_EXPORT __declspec(dllexport) +#else +#define SYCL_RUNTIME_EXPORT +#endif // _WIN32 + +namespace { + +template +auto catchAll(F &&func) { + try { + return func(); + } catch (const std::exception &e) { + fprintf(stdout, "An exception was thrown: %s\n", e.what()); + fflush(stdout); + abort(); + } catch (...) { + fprintf(stdout, "An unknown exception was thrown\n"); + fflush(stdout); + abort(); + } +} + +#define L0_SAFE_CALL(call) \ + { \ + ze_result_t status = (call); \ + if (status != ZE_RESULT_SUCCESS) { \ + fprintf(stdout, "L0 error %d\n", status); \ + fflush(stdout); \ + abort(); \ + } \ + } + +} // namespace + +static sycl::device getDefaultDevice() { + static sycl::device syclDevice; + static bool isDeviceInitialised = false; + if (!isDeviceInitialised) { + auto platformList = sycl::platform::get_platforms(); + for (const auto &platform : platformList) { + auto platformName = platform.get_info(); + bool isLevelZero = platformName.find("Level-Zero") != std::string::npos; + if (!isLevelZero) + continue; + + syclDevice = platform.get_devices()[0]; + isDeviceInitialised = true; + return syclDevice; + } + throw std::runtime_error("getDefaultDevice failed"); + } else + return syclDevice; +} + +static sycl::context getDefaultContext() { + static sycl::context syclContext{getDefaultDevice()}; + return syclContext; +} + +static void *allocDeviceMemory(sycl::queue *queue, size_t size, bool isShared) { + void *memPtr = nullptr; + if (isShared) { + memPtr = sycl::aligned_alloc_shared(64, size, getDefaultDevice(), + getDefaultContext()); + } else { + memPtr = sycl::aligned_alloc_device(64, size, getDefaultDevice(), + getDefaultContext()); + } + if (memPtr == nullptr) { + throw std::runtime_error("mem allocation failed!"); + } + return memPtr; +} + +static void deallocDeviceMemory(sycl::queue *queue, void *ptr) { + sycl::free(ptr, *queue); +} + +static ze_module_handle_t loadModule(const void *data, size_t dataSize) { + assert(data); + ze_module_handle_t zeModule; + ze_module_desc_t desc = {ZE_STRUCTURE_TYPE_MODULE_DESC, + nullptr, + ZE_MODULE_FORMAT_IL_SPIRV, + dataSize, + (const uint8_t *)data, + nullptr, + nullptr}; + auto zeDevice = sycl::get_native( + getDefaultDevice()); + auto zeContext = sycl::get_native( + getDefaultContext()); + L0_SAFE_CALL(zeModuleCreate(zeContext, zeDevice, &desc, &zeModule, nullptr)); + return zeModule; +} + +static sycl::kernel *getKernel(ze_module_handle_t zeModule, const char *name) { + assert(zeModule); + assert(name); + ze_kernel_handle_t zeKernel; + ze_kernel_desc_t desc = {}; + desc.pKernelName = name; + + L0_SAFE_CALL(zeKernelCreate(zeModule, &desc, &zeKernel)); + sycl::kernel_bundle kernelBundle = + sycl::make_kernel_bundle( + {zeModule}, getDefaultContext()); + + auto kernel = sycl::make_kernel( + {kernelBundle, zeKernel}, getDefaultContext()); + return new sycl::kernel(kernel); +} + +static void launchKernel(sycl::queue *queue, sycl::kernel *kernel, size_t gridX, + size_t gridY, size_t gridZ, size_t blockX, + size_t blockY, size_t blockZ, size_t sharedMemBytes, + void **params, size_t paramsCount) { + auto syclGlobalRange = + sycl::range<3>(blockZ * gridZ, blockY * gridY, blockX * gridX); + auto syclLocalRange = sycl::range<3>(blockZ, blockY, blockX); + sycl::nd_range<3> syclNdRange(syclGlobalRange, syclLocalRange); + + queue->submit([&](sycl::handler &cgh) { + for (size_t i = 0; i < paramsCount; i++) { + cgh.set_arg(static_cast(i), *(static_cast(params[i]))); + } + cgh.parallel_for(syclNdRange, *kernel); + }); +} + +// Wrappers + +extern "C" SYCL_RUNTIME_EXPORT sycl::queue *mgpuStreamCreate() { + + return catchAll([&]() { + sycl::queue *queue = + new sycl::queue(getDefaultContext(), getDefaultDevice()); + return queue; + }); +} + +extern "C" SYCL_RUNTIME_EXPORT void mgpuStreamDestroy(sycl::queue *queue) { + catchAll([&]() { delete queue; }); +} + +extern "C" SYCL_RUNTIME_EXPORT void * +mgpuMemAlloc(uint64_t size, sycl::queue *queue, bool isShared) { + return catchAll([&]() { + return allocDeviceMemory(queue, static_cast(size), true); + }); +} + +extern "C" SYCL_RUNTIME_EXPORT void mgpuMemFree(void *ptr, sycl::queue *queue) { + catchAll([&]() { + if (ptr) { + deallocDeviceMemory(queue, ptr); + } + }); +} + +extern "C" SYCL_RUNTIME_EXPORT ze_module_handle_t +mgpuModuleLoad(const void *data, size_t gpuBlobSize) { + return catchAll([&]() { return loadModule(data, gpuBlobSize); }); +} + +extern "C" SYCL_RUNTIME_EXPORT sycl::kernel * +mgpuModuleGetFunction(ze_module_handle_t module, const char *name) { + return catchAll([&]() { return getKernel(module, name); }); +} + +extern "C" SYCL_RUNTIME_EXPORT void +mgpuLaunchKernel(sycl::kernel *kernel, size_t gridX, size_t gridY, size_t gridZ, + size_t blockX, size_t blockY, size_t blockZ, + size_t sharedMemBytes, sycl::queue *queue, void **params, + void ** /*extra*/, size_t paramsCount) { + return catchAll([&]() { + launchKernel(queue, kernel, gridX, gridY, gridZ, blockX, blockY, blockZ, + sharedMemBytes, params, paramsCount); + }); +} + +extern "C" SYCL_RUNTIME_EXPORT void mgpuStreamSynchronize(sycl::queue *queue) { + + catchAll([&]() { queue->wait(); }); +} + +extern "C" SYCL_RUNTIME_EXPORT void +mgpuModuleUnload(ze_module_handle_t module) { + + catchAll([&]() { L0_SAFE_CALL(zeModuleDestroy(module)); }); +} From 88d00a6897d71fded96a4f806ce5ebc46fd2a0de Mon Sep 17 00:00:00 2001 From: Alpha Abdoulaye Date: Thu, 26 Oct 2023 10:45:08 -0700 Subject: [PATCH 104/877] Reland [dsymutil] Add support for mergeable libraries (#70256) Reland https://reviews.llvm.org/D158124 Fixed `-fpermissive` error reported by gcc only. --- llvm/docs/CommandGuide/dsymutil.rst | 15 ++ llvm/include/llvm/BinaryFormat/Dwarf.def | 1 + llvm/include/llvm/BinaryFormat/MachO.h | 1 + llvm/include/llvm/DWARFLinker/DWARFLinker.h | 23 ++ .../llvm/DWARFLinkerParallel/AddressesMap.h | 18 ++ llvm/include/llvm/TargetParser/Triple.h | 6 +- llvm/lib/DWARFLinker/DWARFLinker.cpp | 61 ++++- llvm/lib/TargetParser/Triple.cpp | 58 ++--- .../Contents/Info.plist | 20 ++ .../Resources/DWARF/bar-relink-variant.dylib | Bin 0 -> 8923 bytes .../aarch64/bar-relink-variant.dylib.yml | 8 + .../bar-relink.dylib.dSYM/Contents/Info.plist | 20 ++ .../Contents/Resources/DWARF/bar-relink.dylib | Bin 0 -> 8923 bytes .../Relocations/aarch64/bar-relink.dylib.yml | 8 + .../Inputs/basic-relink.macho.arm64.dylib | Bin 0 -> 50992 bytes .../Inputs/basic-relink.macho.arm64.o | Bin 0 -> 2176 bytes .../Contents/Info.plist | 20 ++ .../Resources/DWARF/foo-relink-variant.dylib | Bin 0 -> 9196 bytes .../DWARF/foo-relink-variant_debug.dylib | Bin 0 -> 9196 bytes .../aarch64/foo-relink-variant.dylib.yml | 9 + .../foo-relink.dylib.dSYM/Contents/Info.plist | 20 ++ .../Contents/Resources/DWARF/foo-relink.dylib | Bin 0 -> 9292 bytes .../Relocations/aarch64/foo-relink.dylib.yml | 10 + .../Contents/Info.plist | 20 ++ .../Resources/DWARF/proxy-relink.dylib | Bin 0 -> 10046 bytes .../aarch64/proxy-relink.dylib.yml | 14 ++ .../Inputs/two-level-relink.macho.arm64.dylib | Bin 0 -> 50944 bytes .../Inputs/variant-relink.macho.arm64.dylib | Bin 0 -> 50944 bytes llvm/test/tools/dsymutil/basic-linking.test | 122 ++++++++++ llvm/test/tools/dsymutil/cmdline.test | 2 + llvm/tools/dsymutil/CMakeLists.txt | 1 + llvm/tools/dsymutil/DebugMap.cpp | 23 +- llvm/tools/dsymutil/DebugMap.h | 44 ++-- llvm/tools/dsymutil/DwarfLinkerForBinary.cpp | 202 ++++++++++++---- llvm/tools/dsymutil/DwarfLinkerForBinary.h | 112 +++++++-- llvm/tools/dsymutil/LinkUtils.h | 6 + llvm/tools/dsymutil/MachODebugMapParser.cpp | 226 +++++++++++++++--- llvm/tools/dsymutil/Options.td | 11 + llvm/tools/dsymutil/RelocationMap.cpp | 92 +++++++ llvm/tools/dsymutil/RelocationMap.h | 160 +++++++++++++ llvm/tools/dsymutil/dsymutil.cpp | 19 +- llvm/tools/dsymutil/dsymutil.h | 6 +- llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp | 12 + llvm/tools/llvm-nm/llvm-nm.cpp | 47 ++-- 44 files changed, 1218 insertions(+), 199 deletions(-) create mode 100644 llvm/test/tools/dsymutil/Inputs/bar-relink-variant.dylib.dSYM/Contents/Info.plist create mode 100644 llvm/test/tools/dsymutil/Inputs/bar-relink-variant.dylib.dSYM/Contents/Resources/DWARF/bar-relink-variant.dylib create mode 100644 llvm/test/tools/dsymutil/Inputs/bar-relink-variant.dylib.dSYM/Contents/Resources/Relocations/aarch64/bar-relink-variant.dylib.yml create mode 100644 llvm/test/tools/dsymutil/Inputs/bar-relink.dylib.dSYM/Contents/Info.plist create mode 100644 llvm/test/tools/dsymutil/Inputs/bar-relink.dylib.dSYM/Contents/Resources/DWARF/bar-relink.dylib create mode 100644 llvm/test/tools/dsymutil/Inputs/bar-relink.dylib.dSYM/Contents/Resources/Relocations/aarch64/bar-relink.dylib.yml create mode 100644 llvm/test/tools/dsymutil/Inputs/basic-relink.macho.arm64.dylib create mode 100644 llvm/test/tools/dsymutil/Inputs/basic-relink.macho.arm64.o create mode 100644 llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Info.plist create mode 100644 llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Resources/DWARF/foo-relink-variant.dylib create mode 100644 llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Resources/DWARF/foo-relink-variant_debug.dylib create mode 100644 llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Resources/Relocations/aarch64/foo-relink-variant.dylib.yml create mode 100644 llvm/test/tools/dsymutil/Inputs/foo-relink.dylib.dSYM/Contents/Info.plist create mode 100644 llvm/test/tools/dsymutil/Inputs/foo-relink.dylib.dSYM/Contents/Resources/DWARF/foo-relink.dylib create mode 100644 llvm/test/tools/dsymutil/Inputs/foo-relink.dylib.dSYM/Contents/Resources/Relocations/aarch64/foo-relink.dylib.yml create mode 100644 llvm/test/tools/dsymutil/Inputs/proxy-relink.dylib.dSYM/Contents/Info.plist create mode 100644 llvm/test/tools/dsymutil/Inputs/proxy-relink.dylib.dSYM/Contents/Resources/DWARF/proxy-relink.dylib create mode 100644 llvm/test/tools/dsymutil/Inputs/proxy-relink.dylib.dSYM/Contents/Resources/Relocations/aarch64/proxy-relink.dylib.yml create mode 100644 llvm/test/tools/dsymutil/Inputs/two-level-relink.macho.arm64.dylib create mode 100644 llvm/test/tools/dsymutil/Inputs/variant-relink.macho.arm64.dylib create mode 100644 llvm/tools/dsymutil/RelocationMap.cpp create mode 100644 llvm/tools/dsymutil/RelocationMap.h diff --git a/llvm/docs/CommandGuide/dsymutil.rst b/llvm/docs/CommandGuide/dsymutil.rst index 02243e227a24d..df621a429bb5c 100644 --- a/llvm/docs/CommandGuide/dsymutil.rst +++ b/llvm/docs/CommandGuide/dsymutil.rst @@ -32,11 +32,26 @@ OPTIONS architectures will be linked by default and any architectures that can't be properly linked will cause :program:`dsymutil` to return an error. +.. option:: --build-variant-suffix + + Specify the build variant suffix used to build the executabe file. + There can be multiple variants for the binary of a product, each built + slightly differently. The most common build variants are 'debug' and + 'profile'. Setting the DYLD_IMAGE_SUFFIX environment variable will + cause dyld to load the specified variant at runtime. + .. option:: --dump-debug-map Dump the *executable*'s debug-map (the list of the object files containing the debug information) in YAML format and exit. No DWARF link will take place. + .. option:: -D + + Specify a directory that contain dSYM files to search for. + This is used for mergeable libraries, so dsymutil knows where to look + for dSYM files with debug information about symbols present in those + libraries. + .. option:: --fat64 Use a 64-bit header when emitting universal binaries. diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index fb328a0257732..d1abb1f361d3e 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -629,6 +629,7 @@ HANDLE_DW_AT(0x3fec, APPLE_objc_complete_type, 0, APPLE) HANDLE_DW_AT(0x3fed, APPLE_property, 0, APPLE) HANDLE_DW_AT(0x3fee, APPLE_objc_direct, 0, APPLE) HANDLE_DW_AT(0x3fef, APPLE_sdk, 0, APPLE) +HANDLE_DW_AT(0x3ff0, APPLE_origin, 0, APPLE) // Attribute form encodings. HANDLE_DW_FORM(0x01, addr, 2, DWARF) diff --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h index 49991ebe7bfaf..bef70f869520b 100644 --- a/llvm/include/llvm/BinaryFormat/MachO.h +++ b/llvm/include/llvm/BinaryFormat/MachO.h @@ -373,6 +373,7 @@ enum StabType { N_SSYM = 0x60u, N_SO = 0x64u, N_OSO = 0x66u, + N_LIB = 0x68u, N_LSYM = 0x80u, N_BINCL = 0x82u, N_SOL = 0x84u, diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h index 6887e441ce8ff..2bd85e30d3b13 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h @@ -62,6 +62,9 @@ class AddressesMap { virtual std::optional getSubprogramRelocAdjustment(const DWARFDie &DIE) = 0; + /// Returns the file name associated to the AddessesMap + virtual std::optional getLibraryInstallName() = 0; + /// Apply the valid relocations to the buffer \p Data, taking into /// account that Data is at \p BaseOffset in the .debug_info section. /// @@ -69,6 +72,23 @@ class AddressesMap { virtual bool applyValidRelocs(MutableArrayRef Data, uint64_t BaseOffset, bool IsLittleEndian) = 0; + /// Check if the linker needs to gather and save relocation info. + virtual bool needToSaveValidRelocs() = 0; + + /// Update and save original relocations located in between StartOffset and + /// EndOffset. LinkedOffset is the value which should be added to the original + /// relocation offset to get new relocation offset in linked binary. + virtual void updateAndSaveValidRelocs(bool IsDWARF5, + uint64_t OriginalUnitOffset, + int64_t LinkedOffset, + uint64_t StartOffset, + uint64_t EndOffset) = 0; + + /// Update the valid relocations that used OriginalUnitOffset as the compile + /// unit offset, and update their values to reflect OutputUnitOffset. + virtual void updateRelocationsWithUnitOffset(uint64_t OriginalUnitOffset, + uint64_t OutputUnitOffset) = 0; + /// Erases all data. virtual void clear() = 0; }; @@ -751,6 +771,9 @@ class DWARFLinker { /// Is there a DW_AT_str_offsets_base in the CU? bool AttrStrOffsetBaseSeen = false; + /// Is there a DW_AT_APPLE_origin in the CU? + bool HasAppleOrigin = false; + AttributesInfo() = default; }; diff --git a/llvm/include/llvm/DWARFLinkerParallel/AddressesMap.h b/llvm/include/llvm/DWARFLinkerParallel/AddressesMap.h index 22fbec20d7d37..b451fee4e0b72 100644 --- a/llvm/include/llvm/DWARFLinkerParallel/AddressesMap.h +++ b/llvm/include/llvm/DWARFLinkerParallel/AddressesMap.h @@ -55,6 +55,9 @@ class AddressesMap { virtual std::optional getSubprogramRelocAdjustment(const DWARFDie &DIE) = 0; + // Returns the library install name associated to the AddessesMap. + virtual std::optional getLibraryInstallName() = 0; + /// Apply the valid relocations to the buffer \p Data, taking into /// account that Data is at \p BaseOffset in the .debug_info section. /// @@ -62,6 +65,21 @@ class AddressesMap { virtual bool applyValidRelocs(MutableArrayRef Data, uint64_t BaseOffset, bool IsLittleEndian) = 0; + /// Check if the linker needs to gather and save relocation info. + virtual bool needToSaveValidRelocs() = 0; + + /// Update and save relocation values to be serialized + virtual void updateAndSaveValidRelocs(bool IsDWARF5, + uint64_t OriginalUnitOffset, + int64_t LinkedOffset, + uint64_t StartOffset, + uint64_t EndOffset) = 0; + + /// Update the valid relocations that used OriginalUnitOffset as the compile + /// unit offset, and update their values to reflect OutputUnitOffset. + virtual void updateRelocationsWithUnitOffset(uint64_t OriginalUnitOffset, + uint64_t OutputUnitOffset) = 0; + /// Erases all data. virtual void clear() = 0; diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index 53cef0abbe0e1..0f56ac68c851f 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -418,9 +418,6 @@ class Triple { /// Get the architecture (first) component of the triple. StringRef getArchName() const; - /// Get the architecture name based on Kind and SubArch. - StringRef getArchName(ArchType Kind, SubArchType SubArch = NoSubArch) const; - /// Get the vendor (second) component of the triple. StringRef getVendorName() const; @@ -1118,6 +1115,9 @@ class Triple { /// Get the canonical name for the \p Kind architecture. static StringRef getArchTypeName(ArchType Kind); + /// Get the architecture name based on \p Kind and \p SubArch. + static StringRef getArchName(ArchType Kind, SubArchType SubArch = NoSubArch); + /// Get the "prefix" canonical name for the \p Kind architecture. This is the /// prefix used by the architecture specific builtins, and is suitable for /// passing to \see Intrinsic::getIntrinsicForClangBuiltin(). diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp index 2d8360f100c11..80a4e2adefa6c 100644 --- a/llvm/lib/DWARFLinker/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp @@ -1026,6 +1026,15 @@ unsigned DWARFLinker::DIECloner::cloneStringAttribute(DIE &Die, StringEntry = DebugLineStrPool.getEntry(*String); } else { StringEntry = DebugStrPool.getEntry(*String); + + if (AttrSpec.Attr == dwarf::DW_AT_APPLE_origin) { + Info.HasAppleOrigin = true; + if (std::optional FileName = + ObjFile.Addresses->getLibraryInstallName()) { + StringEntry = DebugStrPool.getEntry(*FileName); + } + } + // Update attributes info. if (AttrSpec.Attr == dwarf::DW_AT_name) Info.Name = StringEntry; @@ -1637,6 +1646,12 @@ shouldSkipAttribute(bool Update, } } +struct AttributeLinkedOffsetFixup { + int64_t LinkedOffsetFixupVal; + uint64_t InputAttrStartOffset; + uint64_t InputAttrEndOffset; +}; + DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, const DWARFFile &File, CompileUnit &Unit, int64_t PCOffset, uint32_t OutOffset, @@ -1720,6 +1735,9 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, Flags |= TF_SkipPC; } + std::optional LibraryInstallName = + ObjFile.Addresses->getLibraryInstallName(); + SmallVector AttributesFixups; for (const auto &AttrSpec : Abbrev->attributes()) { if (shouldSkipAttribute(Update, AttrSpec, Flags & TF_SkipPC)) { DWARFFormValue::skipValue(AttrSpec.Form, Data, &Offset, @@ -1727,17 +1745,41 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, continue; } + AttributeLinkedOffsetFixup CurAttrFixup; + CurAttrFixup.InputAttrStartOffset = InputDIE.getOffset() + Offset; + CurAttrFixup.LinkedOffsetFixupVal = + Unit.getStartOffset() + OutOffset - CurAttrFixup.InputAttrStartOffset; + DWARFFormValue Val = AttrSpec.getFormValue(); uint64_t AttrSize = Offset; Val.extractValue(Data, &Offset, U.getFormParams(), &U); + CurAttrFixup.InputAttrEndOffset = InputDIE.getOffset() + Offset; AttrSize = Offset - AttrSize; - OutOffset += cloneAttribute(*Die, InputDIE, File, Unit, Val, AttrSpec, - AttrSize, AttrInfo, IsLittleEndian); + uint64_t FinalAttrSize = + cloneAttribute(*Die, InputDIE, File, Unit, Val, AttrSpec, AttrSize, + AttrInfo, IsLittleEndian); + if (FinalAttrSize != 0 && ObjFile.Addresses->needToSaveValidRelocs()) + AttributesFixups.push_back(CurAttrFixup); + + OutOffset += FinalAttrSize; } - // Look for accelerator entries. uint16_t Tag = InputDIE.getTag(); + // Add the DW_AT_APPLE_origin attribute to Compile Unit die if we have + // an install name and the DWARF doesn't have the attribute yet. + const bool NeedsAppleOrigin = (Tag == dwarf::DW_TAG_compile_unit) && + LibraryInstallName.has_value() && + !AttrInfo.HasAppleOrigin; + if (NeedsAppleOrigin) { + auto StringEntry = DebugStrPool.getEntry(LibraryInstallName.value()); + Die->addValue(DIEAlloc, dwarf::Attribute(dwarf::DW_AT_APPLE_origin), + dwarf::DW_FORM_strp, DIEInteger(StringEntry.getOffset())); + AttrInfo.Name = StringEntry; + OutOffset += 4; + } + + // Look for accelerator entries. // FIXME: This is slightly wrong. An inline_subroutine without a // low_pc, but with AT_ranges might be interesting to get into the // accelerator tables too. For now stick with dsymutil's behavior. @@ -1806,8 +1848,19 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, Linker.assignAbbrev(NewAbbrev); Die->setAbbrevNumber(NewAbbrev.getNumber()); + uint64_t AbbrevNumberSize = getULEB128Size(Die->getAbbrevNumber()); + // Add the size of the abbreviation number to the output offset. - OutOffset += getULEB128Size(Die->getAbbrevNumber()); + OutOffset += AbbrevNumberSize; + + // Update fixups with the size of the abbreviation number + for (AttributeLinkedOffsetFixup &F : AttributesFixups) + F.LinkedOffsetFixupVal += AbbrevNumberSize; + + for (AttributeLinkedOffsetFixup &F : AttributesFixups) + ObjFile.Addresses->updateAndSaveValidRelocs( + Unit.getOrigUnit().getVersion() >= 5, Unit.getOrigUnit().getOffset(), + F.LinkedOffsetFixupVal, F.InputAttrStartOffset, F.InputAttrEndOffset); if (!HasChildren) { // Update our size. diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index b9fab469f7476..5d4eb79675f89 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -90,6 +90,36 @@ StringRef Triple::getArchTypeName(ArchType Kind) { llvm_unreachable("Invalid ArchType!"); } +StringRef Triple::getArchName(ArchType Kind, SubArchType SubArch) { + switch (Kind) { + case Triple::mips: + if (SubArch == MipsSubArch_r6) + return "mipsisa32r6"; + break; + case Triple::mipsel: + if (SubArch == MipsSubArch_r6) + return "mipsisa32r6el"; + break; + case Triple::mips64: + if (SubArch == MipsSubArch_r6) + return "mipsisa64r6"; + break; + case Triple::mips64el: + if (SubArch == MipsSubArch_r6) + return "mipsisa64r6el"; + break; + case Triple::aarch64: + if (SubArch == AArch64SubArch_arm64ec) + return "arm64ec"; + if (SubArch == AArch64SubArch_arm64e) + return "arm64e"; + break; + default: + break; + } + return getArchTypeName(Kind); +} + StringRef Triple::getArchTypePrefix(ArchType Kind) { switch (Kind) { default: @@ -1143,34 +1173,6 @@ StringRef Triple::getArchName() const { return StringRef(Data).split('-').first; // Isolate first component } -StringRef Triple::getArchName(ArchType Kind, SubArchType SubArch) const { - switch (Kind) { - case Triple::mips: - if (SubArch == MipsSubArch_r6) - return "mipsisa32r6"; - break; - case Triple::mipsel: - if (SubArch == MipsSubArch_r6) - return "mipsisa32r6el"; - break; - case Triple::mips64: - if (SubArch == MipsSubArch_r6) - return "mipsisa64r6"; - break; - case Triple::mips64el: - if (SubArch == MipsSubArch_r6) - return "mipsisa64r6el"; - break; - case Triple::aarch64: - if (SubArch == AArch64SubArch_arm64ec) - return "arm64ec"; - break; - default: - break; - } - return getArchTypeName(Kind); -} - StringRef Triple::getVendorName() const { StringRef Tmp = StringRef(Data).split('-').second; // Strip first component return Tmp.split('-').first; // Isolate second component diff --git a/llvm/test/tools/dsymutil/Inputs/bar-relink-variant.dylib.dSYM/Contents/Info.plist b/llvm/test/tools/dsymutil/Inputs/bar-relink-variant.dylib.dSYM/Contents/Info.plist new file mode 100644 index 0000000000000..14d6272d3f8d9 --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/bar-relink-variant.dylib.dSYM/Contents/Info.plist @@ -0,0 +1,20 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleIdentifier + com.apple.xcode.dsym.bar-relink-variant.dylib + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + dSYM + CFBundleSignature + ???? + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + diff --git a/llvm/test/tools/dsymutil/Inputs/bar-relink-variant.dylib.dSYM/Contents/Resources/DWARF/bar-relink-variant.dylib b/llvm/test/tools/dsymutil/Inputs/bar-relink-variant.dylib.dSYM/Contents/Resources/DWARF/bar-relink-variant.dylib new file mode 100644 index 0000000000000000000000000000000000000000..ec2e06a231f88946e6a74540c847fa801fcbbb45 GIT binary patch literal 8923 zcmeHN%}*0S6rb(32o#qhF+#Xhf(azDZ3#^bMrqTcG$GE>J1E&F+FoOlUk9 zZ+@ISa`52IBZ(eN^bhb4;NVdY81EWC-rJceOKW4jX)=$z&YL&y{r1gomc#Db_g_E% z=pjTRAl;xRKu1G3qcQ};0O*6E#nSF}GFnr%B_5f??%Tw~j%Hk}{G+nc6qokt<{T+bCGHQy58ZJKw= zW=SiXD;6v?*7t(p<$X!K7N4tb;y#~&YFakEoUXeGNimy`z9lqerj$?2FX^s+6x0eY zcocGcpFTgDSl2A5rd`X;&&_0W{0wT~M1gbigpKFf6zkzNp$l;apjfEqW_s}ox~M#6 zBgcu-`2iyf?;f*l#k>M7FF*mQzy<3ITZN^(;cnd6ktxY_u zaJz^pjrHv_Jig!kKj5*;ia{_S7!V8y1_T3w0l|P^Krrx6GZ294df@?&KR4rZS}#7R zr9eH=3c5|;XNNn)C>Rh72nGZLf&syRU_dY+7!V8y1_T3w0l~n3#sIdrg9N)=k|cdo zPT~Ns1F+{S5qR<+w+(2~uB%>?4BEE={-WS?Nba5s1R)+<#rPV55ZZL-5&jmux6FP@ zFa|xl)4Nm;!KUmb&^^yf$V;?+jr_nqIIe<9on112Ga}GaHPHt@)Lx|;DMszw;L!t25pRcb?agH zE$9W9<97uf?=bTIHJ%veOn{!j287@*3>F*^NW4a|@sKeGPZaLb R=c7|zMSeI~X(|&-`~`Fg`G)`i literal 0 HcmV?d00001 diff --git a/llvm/test/tools/dsymutil/Inputs/bar-relink-variant.dylib.dSYM/Contents/Resources/Relocations/aarch64/bar-relink-variant.dylib.yml b/llvm/test/tools/dsymutil/Inputs/bar-relink-variant.dylib.dSYM/Contents/Resources/Relocations/aarch64/bar-relink-variant.dylib.yml new file mode 100644 index 0000000000000..68fd250cdf2a8 --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/bar-relink-variant.dylib.dSYM/Contents/Resources/Relocations/aarch64/bar-relink-variant.dylib.yml @@ -0,0 +1,8 @@ +--- +triple: 'arm64-apple-darwin' +binary-path: bar-relink-variant.dylib +relocations: + - { offset: 0x26, size: 0x8, addend: 0x0, symName: _bar, symObjAddr: 0x0, symBinAddr: 0x3FA0, symSize: 0x8 } + - { offset: 0x3F, size: 0x8, addend: 0x0, symName: _baz, symObjAddr: 0x8, symBinAddr: 0x4000, symSize: 0x0 } + - { offset: 0x4F, size: 0x8, addend: 0x0, symName: _bar, symObjAddr: 0x0, symBinAddr: 0x3FA0, symSize: 0x8 } +... diff --git a/llvm/test/tools/dsymutil/Inputs/bar-relink.dylib.dSYM/Contents/Info.plist b/llvm/test/tools/dsymutil/Inputs/bar-relink.dylib.dSYM/Contents/Info.plist new file mode 100644 index 0000000000000..37c2a8aebe4c6 --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/bar-relink.dylib.dSYM/Contents/Info.plist @@ -0,0 +1,20 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleIdentifier + com.apple.xcode.dsym.bar-relink.dylib + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + dSYM + CFBundleSignature + ???? + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + diff --git a/llvm/test/tools/dsymutil/Inputs/bar-relink.dylib.dSYM/Contents/Resources/DWARF/bar-relink.dylib b/llvm/test/tools/dsymutil/Inputs/bar-relink.dylib.dSYM/Contents/Resources/DWARF/bar-relink.dylib new file mode 100644 index 0000000000000000000000000000000000000000..9ebd56ecb00a482b3694f1ef2f27dea726effc43 GIT binary patch literal 8923 zcmeHN&rcIU6rSz22o#qhF+#Xh0|`xJ+Y(3&{-8~Z(tu!u#NO6zyFjJvHoFTbF@YFk zJgSKa2M-)Pc=Jf2F(!I7@eknWK@S@5nyByX%#@|I5pSBzBd_!3&3oUz`DQunzJ34g z%a0C1Bm&Y7dIWSZgfl9AF!X?~KmHI4zB^aA`(!&baPQUhF!OtfM1dJ9ApuZ~ClP8# zVFY~A3;GFE)8;4g^I+6Vs@_58SPClQqG+0H+;pi;=Lhk2#_3w?en>6Xa|OwkZwc^L z&GoWb(#qz71q=1{J!5!zUlOmy=c<{w&uvgm%ckekH8&wCX7kavgr>}t@`?E+-PQMl zn&AcaLk{lK=SLIkngrFf>DC_3isGX-MBxau)3&K%BF#?W_TGEPomuT{mAijTQ?UCrvWd+>PxWn z%pvjEZP9?|FuWy(=NuAGFBEKJt#QA53~z?v?Hv-&acwW7p1;^b3iOuY#ZEG#$)IU^ zwOTPWQ(rNhs$Q(e`^@nAS^nq0!GoN7yvQR8#Q(1%rBLcyRvP_kZQl#H=N z>XHVO?vSk9xvhNNR3az(lrHI53luv-WF^-<-a8gP8xFwN3UWUbj`4xvSmh-qiRA;L-WQNK+nJ&zbo)~hmrTM@x(A^7!-x$^6LZZp5TojkrwziAOwG5u;73|5?d4-4;gdt UMBy%dIymN4M`Egypnx$LNK6zGz;o`MX{YNpH6h`P|ABL7 z{xfIJocYbY`)$&1ZePD!LR3Ipir6aH+C4;D?H17}*4ud0vKs0e8(8F)Ku(9!h1@O1 zq3mK=>87=5CAi9sS24#KYgaC!UH$G7C~Hc~N~JTQR9{zaysV1Hd$WsZGhVTZr^hWT zoJuF7@iodnH{Kg6o?DG;t>l2%(q~zjct4m)j8VCOFS$q5=;k`udOAoey!u__eu0nycXDT7B5_~sJ?EYes_CR%z&zuu6@YS zvNv;orL3P~8=2D|3Z|mr>B**8G~ViK3x-<~zF@L#mfsiYj7394ekLHisfTDaS2LAf z&06x3+$Cl=>sqc4pWuF8co?^ru}OJSnS4hzY-V082b6A}pN~fsd&xSdHVr>LZMlET zcK5N`^PVqXnZbQtHYa8ox616coC_8G<(9SW{6%w-??iKO){&V7RFg?1Yk0?E%R5u) zrZ(R^%|PZ&VUsxfdWhtmdO~U0x0Eq#_ByASL%t2zUU+~22!H?xfB*=900@8p2!H?x zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=9 z00@8p2!H?xfB*=900{hF3EWwK{DV7g>f-(HpxWoU`0ngGch-Ngug7g&xHq59-Ycgg z*UAG&iVJD@HEv(xHurbQc%UHrqV}6g*%2~6s4qFbeD}4x!WFnhu5lNZE^_YUj^@SX zD(ob62vV!7u`a$W>yo?ia)($<-*cC?S>zsX3E#l?IyZb>9JAmV?yKXv#c}i){#m1q zCJ2B42!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p z2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfWQMLVE?`E2fQYh z0|5{K0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5coF%FLy0tYh)`> z#+6+>svfCoUD-NNL=@0HYOYIZt38uWal62xY0~AUa~t{b_eSziHp^bFB$chBN8PkT z`uA@n@&se)=0t)X4h3mcB${fE1v_cH^h{x=(ca=oL{mA`SWa^B90}^I8Ihz_@pB!~ zg6n5*l8@FpA7Ly-wD2{@&3vkO%*>~dwV6*@j;>~%FQ2t4pGSB-$wzB$ z+UK3iM~|2DxR*^A@i+66{grd4&R^n-Hs_b~XG2*wF7m#~xPJfSyhZEtoWPnT|946a zS7KhW)YoE#f=S}XIG%{EE?)_GrTydeCaqF`46E6IvSZUg8u1-7?_DM zkWe`3_D>}}{97bHQJQfj_wg~h9w(QFEB;5+!6rW)Qc8(Vk=I2fjvVsYt~~~2>ZJM6 zXXCHz-LUvTm1p9?Gtra3_*U1)CVzV4*zLzEzdrQix4j>3+JCSyar(FB9?!+IXIouM z8w>AkIJ9Bi)*n89eZltXy+e(A-#)$SN=MnQ%a_LVw!XTetKNSr^U{IiFTWSRF}>=r zH~jPY-(S1=ZnOLS>Y`bHhQ~GEEFE_E+#h?sTeFIJn`UzB=E#P00|^K@<5^{Jm`bbC-6ZCPsSIc(Kk)i@7y_eTP8*8NzVPwJ@=e% z&YgSi%(s91{MQyC3<04b4IQB&4~4u4y=N+O-J!~BHOhhLc^_=j962FUM&7w7w&Qtq zyjESEYilnX#Fv777bNNtX>m!%5K8HWQMc(Yuv5xs`81C%J>vQ_2XU_zDH~pWaX~uu zde>Vb{;~JUbH{oi``P-K@oa(*?q?`9U(=rtC?(0a%se6EBj)=`@Ao-;*tE(>^8Lm< zKQf-ku(1k>`kYRu-+9M`P`t5x+1ygpKbaLDW}IcRl!r{%d>NY&-E#g zUoA((0lHsWP2VF=a=-BE( zw(ow)+F|TOL8)OeIq-JwP%eckO~aTRKrJuB_ZBM53<}daoXf$?l9|gLhS_He${pQf zT9kN%K3R0`O-nVXczOZEALS0U=cHWxM-K&5OpVP zxQwY#lJ%d`3!W_-w<+Sb_sFg_%}O8U{63FLn(&( zl0>~U;D>6480J-{1>btS|0hP*_X6|Ne}L-4S)ws=y*l0e^@|^bpZ+k4^$L^@nrfiK zAA~~a(8)!uz%E!U+%0K6*GP9Mt#)e|#-G+!0U#2n@o#6Z@;CK94ZAYw&A(@isZ>7* zXl*+bcXEi{4;<6DzJ%|56l3-f5ZT$+cn+ZT>=DLSfR$g6{f#@U5Niruk+e l^!JQXx2?&?67jE!)SV(#N42C)zpPf&?i^5KNsUFF@DKB93~c}a literal 0 HcmV?d00001 diff --git a/llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Info.plist b/llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Info.plist new file mode 100644 index 0000000000000..7e84e95bd3f48 --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Info.plist @@ -0,0 +1,20 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleIdentifier + com.apple.xcode.dsym.foo-relink-variant.dylib + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + dSYM + CFBundleSignature + ???? + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + diff --git a/llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Resources/DWARF/foo-relink-variant.dylib b/llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Resources/DWARF/foo-relink-variant.dylib new file mode 100644 index 0000000000000000000000000000000000000000..523f85712de567adb16857bb2603c07d64bc4638 GIT binary patch literal 9196 zcmeHNJ8Tn47@qZdlMp)#1OY)%EH44#upJa6;B=612E`;igG5Aeduw|WnV4P6y9u0h z5(p@u195Gn6clubiUOKQAR!tmI$CsBfRIvx;O_f(W*mE+=yKN~%}BHVWB&h}na@*r z9&bK>{FY`+V<_FI9jNz`Wbv{Yq7U`hlMm+}7`gi0-lua@htBNXFZ@v=GhtC>EP+b# zYz@~f$ki}6q5g$x+ZT?HUw{%XgK-D94G5})OU1TB_gctxZnxrkG2 zDoaa==hPi<(hb_iH!H^Xt4N=L$0LMC0RCfTl~!T2#k8=gFPAiP^fss--{Ao0ug zE@+E~5>{lje|UTm4Ln|A%ge=h#xHOrBWG7M=`_p1`U!O|@-p@qQqI_d-k3fan*Wr( zI^BipiTmkiWEs)TXmTF9k!0;XyhMZS;S_Z0p?RE%W<&GeqIt20cVxBppsm$*n(GYR zy!pHN>AJaQ<7RWM){AExi42;0JN~@BX|PW>52t=i71kZavjTm9B^nOQe+SdeBdJsh z&}@kArma#Hcl*|iTMsI}XZ0V*X)1K|G$6=DDY literal 0 HcmV?d00001 diff --git a/llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Resources/DWARF/foo-relink-variant_debug.dylib b/llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Resources/DWARF/foo-relink-variant_debug.dylib new file mode 100644 index 0000000000000000000000000000000000000000..523f85712de567adb16857bb2603c07d64bc4638 GIT binary patch literal 9196 zcmeHNJ8Tn47@qZdlMp)#1OY)%EH44#upJa6;B=612E`;igG5Aeduw|WnV4P6y9u0h z5(p@u195Gn6clubiUOKQAR!tmI$CsBfRIvx;O_f(W*mE+=yKN~%}BHVWB&h}na@*r z9&bK>{FY`+V<_FI9jNz`Wbv{Yq7U`hlMm+}7`gi0-lua@htBNXFZ@v=GhtC>EP+b# zYz@~f$ki}6q5g$x+ZT?HUw{%XgK-D94G5})OU1TB_gctxZnxrkG2 zDoaa==hPi<(hb_iH!H^Xt4N=L$0LMC0RCfTl~!T2#k8=gFPAiP^fss--{Ao0ug zE@+E~5>{lje|UTm4Ln|A%ge=h#xHOrBWG7M=`_p1`U!O|@-p@qQqI_d-k3fan*Wr( zI^BipiTmkiWEs)TXmTF9k!0;XyhMZS;S_Z0p?RE%W<&GeqIt20cVxBppsm$*n(GYR zy!pHN>AJaQ<7RWM){AExi42;0JN~@BX|PW>52t=i71kZavjTm9B^nOQe+SdeBdJsh z&}@kArma#Hcl*|iTMsI}XZ0V*X)1K|G$6=DDY literal 0 HcmV?d00001 diff --git a/llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Resources/Relocations/aarch64/foo-relink-variant.dylib.yml b/llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Resources/Relocations/aarch64/foo-relink-variant.dylib.yml new file mode 100644 index 0000000000000..0bc06e9a9e857 --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/foo-relink-variant.dylib.dSYM/Contents/Resources/Relocations/aarch64/foo-relink-variant.dylib.yml @@ -0,0 +1,9 @@ +--- +triple: 'arm64-apple-darwin' +binary-path: foo-relink-variant.dylib +relocations: + - { offset: 0x26, size: 0x8, addend: 0x0, symName: _foo, symObjAddr: 0x0, symBinAddr: 0x3F64, symSize: 0x20 } + - { offset: 0x33, size: 0x8, addend: 0x0, symName: _foo, symObjAddr: 0x0, symBinAddr: 0x3F64, symSize: 0x20 } + - { offset: 0x88, size: 0x8, addend: 0x0, symName: _altfoo, symObjAddr: 0x0, symBinAddr: 0x3F84, symSize: 0x24 } + - { offset: 0x95, size: 0x8, addend: 0x0, symName: _altfoo, symObjAddr: 0x0, symBinAddr: 0x3F84, symSize: 0x24 } +... diff --git a/llvm/test/tools/dsymutil/Inputs/foo-relink.dylib.dSYM/Contents/Info.plist b/llvm/test/tools/dsymutil/Inputs/foo-relink.dylib.dSYM/Contents/Info.plist new file mode 100644 index 0000000000000..e919260131558 --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/foo-relink.dylib.dSYM/Contents/Info.plist @@ -0,0 +1,20 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleIdentifier + com.apple.xcode.dsym.foo-relink.dylib + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + dSYM + CFBundleSignature + ???? + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + diff --git a/llvm/test/tools/dsymutil/Inputs/foo-relink.dylib.dSYM/Contents/Resources/DWARF/foo-relink.dylib b/llvm/test/tools/dsymutil/Inputs/foo-relink.dylib.dSYM/Contents/Resources/DWARF/foo-relink.dylib new file mode 100644 index 0000000000000000000000000000000000000000..94480df788f119b27746827f6ace437ef091891c GIT binary patch literal 9292 zcmeHN&ugYW11C6E6>veEkPsItRTZemf?B?}GvnB6N6I0`8EN*-d-L8m zGoN=Q&%Al~@8ADr8Pga_2DK0Mhcs!t3_%Q{j*Q=Wyl3^38@K*C{q4^~cb*&&_BfDP z(5Nz&LM4B;jWHdf2uT>bQU64BocW^*^JpZ?MAE=@1A;2yQgNKf`zqpXu6N?iAK`to zYc8$W*pz11+q!_a>|d_=Rj1~k3(%3A-)(_+wk=4`rOni8(mJP69cOmtc=>2)My_>2 zxPXti2w4^?@yNO42ErKxfsRu;J$Z6UctM;HO^K7^`mFfP`k@lGR0B=8#c+!0Ut+m66e5`dpxc?P&aDR!tUT*O+ zJkv4EK)m=YD?%D!>{m#+avHToLk@}-{6ZF_gf+OF-uJ&%-)t{r_ZuI-)})U8A2$L7q=Be-{WZpCYcwZPAfj@w1Mn9Il3k{n*ULx zj}9Bvqz{FVAH{qh-ZbCbAM<~Ium7JCbV`a>QRDXtl5X2jiQgYcnSFnTHpTyfN=1&z z=OJ$5z)o)%%}d8!8iSc=(LEpB&bfZm{0Nd>M*6_$%^YUzr~IMp + + + + CFBundleDevelopmentRegion + English + CFBundleIdentifier + com.apple.xcode.dsym.proxy-relink.dylib + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + dSYM + CFBundleSignature + ???? + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + diff --git a/llvm/test/tools/dsymutil/Inputs/proxy-relink.dylib.dSYM/Contents/Resources/DWARF/proxy-relink.dylib b/llvm/test/tools/dsymutil/Inputs/proxy-relink.dylib.dSYM/Contents/Resources/DWARF/proxy-relink.dylib new file mode 100644 index 0000000000000000000000000000000000000000..c79d8f7825b58cc9ad52b2d9d014ee99089a5e4a GIT binary patch literal 10046 zcmeHNUuYaf7@xhny}RV%*|dZ6 zyJ@Xf8Z49uMGz|#DilFPP!Rg42>pW(3R=ZKeeuZ`MG-|GEDGxH+nKp+Hg~2z`La{K ze1GRR-+Vi_bY}MEA3y!o!kEU;nn7{Uktk`RZ35c>dTOlo!;XRN&IgCC6w{qMetbmQ z-Ht2?jfk-jh~n8Afwcg>4{AGT5y6~K4Np%|fUgbubgAm#L=mZT?wHRwQUUYhbqok! zX(`0bsisEQLQRIix$Vs7?3&4swoae zY;tivUp`qZ7kIIJ)N=LZ^}8tXW<5B543Eow4i{aVq3l$a@7%L{Qap`88Kt;X6{CEK z=y~IjBCi?C^BADl$D{S6ns?5$#KhVWURobZd8r=Y3sN4G+{e6mk&E+!;TTInBCjP~ z;i>gRp4I9#f*8(sjqV;F9vW3^_bT;7qhe5#{A;1PbMwz{1Cm8P`?C{|$tbYTq^NLG zLhp|WPK@T*BvhK8nVX;CrDEA6QzN`SHF!#PY`_$rQ8mglro95M?{m@ds_fr)Zv!A{ zR(Nu`uE4u3@m`Yo-M&LSBbTe1Csxk)+!tctcjfr!?z^-6@FR5t{q^hpQuIG2^Xu(g z4bP}lN+vHGbEaJ}@_xL_Uy1$-=JMwMfQL{&-VKTOj)b{!hj`AZ3ZCvtJWDRvwL-@*K6S3Ir7hDiBm4s6bGGpaMY! zf(ir`2r3X%AgDl40dEE9Iy1s(SFdT>pUJz3(bX47W6nuyYsoxemelTh&dASNX`?!~ zttahX|KMUWq_*~LdZhU`T^kR@0YcmR2%}q|I=G&lehoz(AhhY>=J8Mj-A4Zn!gC~0 zgHUG*>-AXkSv@kYH@~lkqhoG9yIe53jH;85I89KKtl_CMjy4xBu?+8FAc=cpal@^O zK<6j*nuy;Amxx!>723)kBK~?UUd>J2tND9~_$_#B0#W7OD1HI}5f>!xKJOOR1#J)L zifd);8n|~>p^spQYT-}8;<1R=rOkZ{qZW2gk|e55tEh$jfWHcL8Zx1J=l(nNXe(mJ zA!zZYFk66t7HWf0kHoyen$-@Y7N%I111%rx9yrxlm*Bky^49Gk_~TxKW(^UZei(sG zp_f}mNQ;rChOaZq7=qL&-y`xG$Xji96t6+E4iauyjbf1Ij#(Q;Yt=R<*XiNp3#XGm zpGmG+zbVfyi@rSz8OfJFspXa zDyO=8(_QJV)E3vewY#?qTDqqz-TmNd?~x3_74wDz7&|kaw+dz&x6GNHR@pJjj-45z zcaT*vtC_te0|$TAIXm-&k>4{poyM7;Yw*NO4vi~!cMo!~3&)v=uBn;Pa%JAJS$>wz z4|#TCB71;OjvgH5QwR1AbI#Oh;<7hY>nat9ntbHoeLe)nTwkZ1>O~ue>QbhG)D31COr@q&ZEGFIqy=X5cE5 uOj10zB*M?=1qKVFZ`Tm9|JHM_Kw|VlBlC6;%s0=Dya}n6D!HiDE9*b6pWSEx literal 0 HcmV?d00001 diff --git a/llvm/test/tools/dsymutil/Inputs/proxy-relink.dylib.dSYM/Contents/Resources/Relocations/aarch64/proxy-relink.dylib.yml b/llvm/test/tools/dsymutil/Inputs/proxy-relink.dylib.dSYM/Contents/Resources/Relocations/aarch64/proxy-relink.dylib.yml new file mode 100644 index 0000000000000..44dd0a2342da8 --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/proxy-relink.dylib.dSYM/Contents/Resources/Relocations/aarch64/proxy-relink.dylib.yml @@ -0,0 +1,14 @@ +--- +triple: 'arm64-apple-darwin' +binary-path: proxy-relink.dylib +relocations: + - { offset: 0x26, size: 0x8, addend: 0x0, symName: _display, symObjAddr: 0x0, symBinAddr: 0x3F1C, symSize: 0x1C } + - { offset: 0x41, size: 0x8, addend: 0x0, symName: _display, symObjAddr: 0x0, symBinAddr: 0x3F1C, symSize: 0x1C } + - { offset: 0x7C, size: 0x8, addend: 0x0, symName: _bar, symObjAddr: 0x3FA0, symBinAddr: 0x3F38, symSize: 0x8 } + - { offset: 0x99, size: 0x8, addend: 0x0, symName: _baz, symObjAddr: 0x4000, symBinAddr: 0x8000, symSize: 0x0 } + - { offset: 0xA9, size: 0x8, addend: 0x0, symName: _bar, symObjAddr: 0x3FA0, symBinAddr: 0x3F38, symSize: 0x8 } + - { offset: 0xE8, size: 0x8, addend: 0x0, symName: _foo, symObjAddr: 0x3F60, symBinAddr: 0x3F40, symSize: 0x24 } + - { offset: 0xF9, size: 0x8, addend: 0x0, symName: _foo, symObjAddr: 0x3F60, symBinAddr: 0x3F40, symSize: 0x24 } + - { offset: 0x14E, size: 0x8, addend: 0x0, symName: _altfoo, symObjAddr: 0x3F84, symBinAddr: 0x3F64, symSize: 0x24 } + - { offset: 0x15F, size: 0x8, addend: 0x0, symName: _altfoo, symObjAddr: 0x3F84, symBinAddr: 0x3F64, symSize: 0x24 } +... diff --git a/llvm/test/tools/dsymutil/Inputs/two-level-relink.macho.arm64.dylib b/llvm/test/tools/dsymutil/Inputs/two-level-relink.macho.arm64.dylib new file mode 100644 index 0000000000000000000000000000000000000000..333ac9f5d904ce2e802bee5ce44bc0346498c85b GIT binary patch literal 50944 zcmeI*e{54#6bJCLwwqJNhCp-+!%aj2!diYTWYI**uK=R33}R;ZHi5h~$z#qgC771CRCiw z)6zMW-7G5^YE7zuhs=D%vz)bv+9I**&UQ(ZGDFKsB%5mzx2H1mZB+UA-s~o^nJ-u6 z)9aR1n@GmP(N$`HX1=W|pIfc#ScUy^%baC3M_a7@+0Tgm zu&9atb?IF^rqr=tjTy07m+Y{tU?3S#d+k{6^G>Yu>E}t}p}ZE_d0t$!WbyoYi}bUb zRyqA@)bT@3Eca&K-*Aro+zT`2lP$5bNN9B^QWg(I!qEnAW1zM^<_*LfXO??|ZIN&d zpTMo0CB&W5L@Rliq2iSsOMP+=l6x=5Q+RAWLR85G4CCWU?$RcxF2ASA+|Bwp?AMz3 z{DEA?@GiMd#raddBgdz8?EKW{zdXNc*>s*O2p8BgI&MG+~oIC z!Ci7W*O4=`$k&{R`}nSSToXsq*>G$BDMy>^R(& zc3Wrsl&X^P)?Di8;^RvNbnsGv|6p!5Ju1iN_?Xu^^`4288adb=e)o0n2$%mlxk}C~ zUF^J%TdefWQC^LecjNs$O4skX%dy$y{oOo%^SjV~`8N1R}}0Sd1R7 z2~bfmoM?&!+UQA{nZTQhbH__M?m_ubP3o@K0gn^82aR}IMqJBr9cL;3B=i=Acr`b#p_&3ZoJeP%tII5z9qk`aH!vD7moqn-*?&m+7p_2^jF zlgsncB$j&gdI86!+;x+6v%a$&oArGs>v}VfJsI&ejt8pGuj(7i>r$VN<$B^JU7EyF zpSf<Wffo}j8n9zUr^$9yJVo z$@3!X`uPzLiS_-CfH=IJ4YaOHJ`uYnxsQwnf4e(LFIa4oIQzO`omn)VI99C~}*+Y!Tr}+1=65c%XCnnem#BpjcXs&xpz#-nfX_3N!_*K!w&p-b=RpC>&{)+ z?Am{2sQmZ4p6@NW{_?KQ&BwofcihN{?_FFmJ@M?`?l}klT=2xxFSyt3yx;soo&U@)uwC(sUi~a-1bU{D> literal 0 HcmV?d00001 diff --git a/llvm/test/tools/dsymutil/Inputs/variant-relink.macho.arm64.dylib b/llvm/test/tools/dsymutil/Inputs/variant-relink.macho.arm64.dylib new file mode 100644 index 0000000000000000000000000000000000000000..3c434096f7dc9736894757e70a423396128c57be GIT binary patch literal 50944 zcmeI5U2IfE6vxkg&{ioeLPKb5?Mi@Dpu38Qd?^c-NPz%>MyrUox80U**zS_uTci|W zLsG0Us3n*XpFpA_(Le~{BSEcFQG-SxiqW8=#sDfPiHU)T^_+WWy0@D)eIVh*|ADhJ z|Cw{oJ@cE{eVg=)>zDt{C-M+m4*PKSb$+6DyGK;ax`Ss8qk3LVHH&NuXALMlgonjB zm2HM$HLSCg;3hj?V74>Yq(Ve{GvSgb?Usg-vRdj=y+hggI#fPhn{A@q`SMggJ#QHG zDJvO`uTt^Z`L?TkJ~gkkl6`W^m|?WUpNYmJMl{}-uytR4J5)ZsUeWIQbY9)cKJ2jb zRL-fMV?45C@p5N3!YX2bR_Ya(4VJw%j8zHOHn8qLUmnNW7IvOj%046ZVNnBp>(Wu& zrd2~^V~VbyA#>u6b< zdws)LhuKGGjjuJ6Q8R8;BpYJU`08M@S>Kchn#ty=Q-YD!ShSAAZJWX*ggbQ+)pB#c z(zUFmK3Rk0-p_g>w@t^{xqzYEU&<~vPPu%aCa}BfW7)@Q`T731+{sb$p6x%+?8yCW z*4`Ia20t{5o|`_I#{%q5&I0Zg*nQa_%K6(T@3ZR{%_iTMY3!2Ad5=8lp-@XI8RB(| zFKT)|-@4(WP6qKZEq%Ci6knme(e}ZfE}wC>=Mg&H zQ%FZI7KV@J4WTH-&E3WUkOW$pBe>b0Revh_qe-m*IPpxxh*UuMiC5^bYwC?TUyC<_00@8p2!H?x zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=9 z00@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xWDy9kUdmp>?orN-ZGP2`R;_Mq z9nK*N>k;*=S7~ESiuIEqcFOp%jYNJkW;G@f zbX%QC#gS-gP0VbiyJciN2bBcIN;>w4d?+jDHlhW$_hsottQ+~M$dwCrm^>QfkHBp1 zuobb7b!nNG_mr@UiC&yF?|y$f&o@b+vIMv8de&v>eXRSdXO^m`l;@=$t#v&C9v9Q~ zxaZyVe8(~Fdb(M=>-juO=W@dS>Um1lBikwUXszoJMq;`i_q@BF2ROi8PZ?`>J>^+i zSoc>?TGewqzfUjYaEvpU(sYT}hpl+)UoS8C4xKF8|l3Pp|easTtC<;oye#?ccup!u;na zy*b!?#d|q5zqS`W{=|h#m9KibPyBP~u;-YybLfnHBXf6o&XxY~)o)v_zS8L1H7RH6 zU-e~;SBDKf^5vD?XI5-J|I0S-!OMf?zuEbMVA-|Dc6V+&d1&jH5#=vkSTQ*@V}JMD zqklen*SH0~&9B{T{=uIAW8#x}B|VF*O4GY#)Q{C|ueWVE8K_FktUZ3_SkHssJj+}C EA1fw95C8xG literal 0 HcmV?d00001 diff --git a/llvm/test/tools/dsymutil/basic-linking.test b/llvm/test/tools/dsymutil/basic-linking.test index cc521430b0c87..88cd3293efa24 100644 --- a/llvm/test/tools/dsymutil/basic-linking.test +++ b/llvm/test/tools/dsymutil/basic-linking.test @@ -2,6 +2,9 @@ RUN: dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86 RUN: dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO RUN: dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE RUN: dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 %p/Inputs/basic-lto.macho.x86_64 %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefixes=CHECK,CHECK-LTO,CHECK-ARCHIVE +RUN: dsymutil -no-output -verbose -oso-prepend-path=%p -D %p/Inputs %p/Inputs/basic-relink.macho.arm64.dylib | FileCheck %s --check-prefix=CHECK-RELINK +RUN: dsymutil -no-output -verbose -oso-prepend-path=%p -D %p/Inputs %p/Inputs/two-level-relink.macho.arm64.dylib | FileCheck %s --check-prefix=CHECK-RELINK-TWO +RUN: dsymutil -no-output -verbose -oso-prepend-path=%p -build-variant-suffix=_debug -D WrongPath -D %p/Inputs %p/Inputs/variant-relink.macho.arm64.dylib | FileCheck %s --check-prefix=CHECK-RELINK-VARIANT This test check the basic Dwarf linking process through the debug dumps. @@ -175,3 +178,122 @@ CHECK-ARCHIVE: Found valid debug map entry: _inc 0x0000000000000070 => 0x0000000 CHECK-ARCHIVE-NEXT: Keeping subprogram DIE: CHECK-ARCHIVE-NEXT: DW_TAG_subprogram CHECK-ARCHIVE-NEXT: DW_AT_name {{.*}}"inc") + + +================================= Simple relink ================================ +CHECK-RELINK: DEBUG MAP OBJECT: {{.*}}basic-relink.macho.arm64.o +CHECK-RELINK: Input compilation unit: +CHECK-RELINK-NEXT: TAG_compile_unit +CHECK-RELINK-NOT: TAG +CHECK-RELINK: AT_name {{.*}}basic-relink.macho.arm64.c + +CHECK-RELINK: DEBUG MAP OBJECT: {{.*}}foo-relink.dylib +CHECK-RELINK: Input compilation unit: +CHECK-RELINK-NEXT: TAG_compile_unit +CHECK-RELINK-NOT: TAG +CHECK-RELINK: AT_name {{.*}}foo-relink.c + +CHECK-RELINK: Input compilation unit: +CHECK-RELINK-NEXT: TAG_compile_unit +CHECK-RELINK-NOT: TAG +CHECK-RELINK: AT_name {{.*}}altfoo-relink.c + +CHECK-RELINK: DEBUG MAP OBJECT: {{.*}}bar-relink.dylib +CHECK-RELINK: Input compilation unit: +CHECK-RELINK-NEXT: TAG_compile_unit +CHECK-RELINK-NOT: TAG +CHECK-RELINK: AT_name {{.*}}bar-relink.c + +CHECK-RELINK-NOT: Found valid debug map entry +CHECK-RELINK: Found valid debug map entry: _display 0x0000000000000000 => 0x0000000000003f10 +CHECK-RELINK-NEXT: Keeping subprogram DIE: +CHECK-RELINK-NEXT: DW_TAG_subprogram +CHECK-RELINK: DW_AT_name{{.*}}"display" + +CHECK-RELINK: Found valid debug map entry: _foo 0x0000000000003f54 => 0x0000000000003f2c +CHECK-RELINK-NEXT: Keeping subprogram DIE: +CHECK-RELINK-NEXT: DW_TAG_subprogram +CHECK-RELINK: DW_AT_name {{.*}}"foo" + +CHECK-RELINK-NOT: Found valid debug map entry +CHECK-RELINK: Found valid debug map entry: _foo_unused 0x0000000000003f74 => 0x0000000000003f4c +CHECK-RELINK-NEXT: Keeping subprogram DIE: +CHECK-RELINK-NEXT: DW_TAG_subprogram +CHECK-RELINK: DW_AT_name {{.*}}"foo_unused" + +CHECK-RELINK-NOT: Found valid debug map entry +CHECK-RELINK: Found valid debug map entry: _altfoo 0x0000000000003f7c => 0x0000000000003f54 +CHECK-RELINK-NEXT: Keeping subprogram DIE: +CHECK-RELINK-NEXT: DW_TAG_subprogram +CHECK-RELINK: DW_AT_name {{.*}}"altfoo" + +CHECK-RELINK-NOT: Found valid debug map entry +CHECK-RELINK: Found valid debug map entry: _baz 0x0000000000004000 => 0x0000000000008000 +CHECK-RELINK-NEXT: Keeping variable DIE: +CHECK-RELINK-NEXT: DW_TAG_variable +CHECK-RELINK-NEXT: DW_AT_name {{.*}}"baz" + +CHECK-RELINK-NOT: Found valid debug map entry +CHECK-RELINK: Found valid debug map entry: _bar 0x0000000000003fa0 => 0x0000000000003f78 +CHECK-RELINK-NEXT: Keeping subprogram DIE: +CHECK-RELINK-NEXT: DW_TAG_subprogram +CHECK-RELINK: DW_AT_name {{.*}}"bar" + +================================= Two level relink ================================ +CHECK-RELINK-TWO: DEBUG MAP OBJECT: {{.*}}proxy-relink.dylib +CHECK-RELINK-TWO: Input compilation unit: +CHECK-RELINK-TWO-NEXT: TAG_compile_unit +CHECK-RELINK-TWO-NOT: TAG +CHECK-RELINK-TWO: AT_name {{.*}}two-level-relink.macho.arm64.c + +CHECK-RELINK-TWO: Input compilation unit: +CHECK-RELINK-TWO-NEXT: TAG_compile_unit +CHECK-RELINK-TWO-NOT: TAG +CHECK-RELINK-TWO: AT_name {{.*}}bar-relink.c +CHECK-RELINK-TWO: DW_AT_APPLE_origin {{.*}}/path/to/bar-relink.dylib + +CHECK-RELINK-TWO: Input compilation unit: +CHECK-RELINK-TWO-NEXT: TAG_compile_unit +CHECK-RELINK-TWO-NOT: TAG +CHECK-RELINK-TWO: AT_name {{.*}}foo-relink.c +CHECK-RELINK-TWO: DW_AT_APPLE_origin {{.*}}/path/to/foo-relink.dylib + +CHECK-RELINK-TWO: Input compilation unit: +CHECK-RELINK-TWO-NEXT: TAG_compile_unit +CHECK-RELINK-TWO-NOT: TAG +CHECK-RELINK-TWO: AT_name {{.*}}altfoo-relink.c +CHECK-RELINK-TWO: DW_AT_APPLE_origin {{.*}}/path/to/foo-relink.dylib + +CHECK-RELINK-TWO-NOT: Found valid debug map entry +CHECK-RELINK-TWO: Found valid debug map entry: _display 0x0000000000003f1c => 0x0000000000003f1c +CHECK-RELINK-TWO-NEXT: Keeping subprogram DIE: +CHECK-RELINK-TWO-NEXT: DW_TAG_subprogram +CHECK-RELINK-TWO: DW_AT_name{{.*}}"display" + +CHECK-RELINK-TWO-NOT: Found valid debug map entry +CHECK-RELINK-TWO: Found valid debug map entry: _baz 0x0000000000008000 => 0x0000000000008000 +CHECK-RELINK-TWO-NEXT: Keeping variable DIE: +CHECK-RELINK-TWO-NEXT: DW_TAG_variable +CHECK-RELINK-TWO-NEXT: DW_AT_name {{.*}}"baz" + +CHECK-RELINK-TWO-NOT: Found valid debug map entry +CHECK-RELINK-TWO: Found valid debug map entry: _bar 0x0000000000003f38 => 0x0000000000003f38 +CHECK-RELINK-TWO-NEXT: Keeping subprogram DIE: +CHECK-RELINK-TWO-NEXT: DW_TAG_subprogram +CHECK-RELINK-TWO: DW_AT_name {{.*}}"bar" + +CHECK-RELINK-TWO: Found valid debug map entry: _foo 0x0000000000003f40 => 0x0000000000003f40 +CHECK-RELINK-TWO-NEXT: Keeping subprogram DIE: +CHECK-RELINK-TWO-NEXT: DW_TAG_subprogram +CHECK-RELINK-TWO: DW_AT_name {{.*}}"foo" + +CHECK-RELINK-TWO-NOT: Found valid debug map entry +CHECK-RELINK-TWO: Found valid debug map entry: _altfoo 0x0000000000003f64 => 0x0000000000003f64 +CHECK-RELINK-TWO-NEXT: Keeping subprogram DIE: +CHECK-RELINK-TWO-NEXT: DW_TAG_subprogram +CHECK-RELINK-TWO: DW_AT_name {{.*}}"altfoo" + +================================= Build variants relink ================================ +CHECK-RELINK-VARIANT: DEBUG MAP OBJECT: {{.*}}basic-relink.macho.arm64.o +CHECK-RELINK-VARIANT: DEBUG MAP OBJECT: {{.*}}foo-relink-variant_debug.dylib +CHECK-RELINK-VARIANT: DEBUG MAP OBJECT: {{.*}}bar-relink-variant.dylib diff --git a/llvm/test/tools/dsymutil/cmdline.test b/llvm/test/tools/dsymutil/cmdline.test index 2317852f3c489..36cf3f542695c 100644 --- a/llvm/test/tools/dsymutil/cmdline.test +++ b/llvm/test/tools/dsymutil/cmdline.test @@ -7,7 +7,9 @@ HELP-NOT: -reverse-iterate HELP: Dsymutil Options: CHECK: -accelerator CHECK: -arch +CHECK: -build-variant-suffix CHECK: -dump-debug-map +CHECK: -D CHECK: -fat64 CHECK: -flat CHECK: -gen-reproducer diff --git a/llvm/tools/dsymutil/CMakeLists.txt b/llvm/tools/dsymutil/CMakeLists.txt index 3cb7594d2fd92..c612bfd9150c4 100644 --- a/llvm/tools/dsymutil/CMakeLists.txt +++ b/llvm/tools/dsymutil/CMakeLists.txt @@ -30,6 +30,7 @@ add_llvm_tool(dsymutil MachODebugMapParser.cpp MachOUtils.cpp Reproducer.cpp + RelocationMap.cpp SymbolMap.cpp DEPENDS diff --git a/llvm/tools/dsymutil/DebugMap.cpp b/llvm/tools/dsymutil/DebugMap.cpp index d4e2c2b2cfac7..dcdecdfe8210b 100644 --- a/llvm/tools/dsymutil/DebugMap.cpp +++ b/llvm/tools/dsymutil/DebugMap.cpp @@ -45,6 +45,11 @@ DebugMapObject::DebugMapObject(StringRef ObjectFilename, bool DebugMapObject::addSymbol(StringRef Name, std::optional ObjectAddress, uint64_t LinkedAddress, uint32_t Size) { + if (Symbols.count(Name)) { + // Symbol was previously added. + return true; + } + auto InsertResult = Symbols.insert( std::make_pair(Name, SymbolMapping(ObjectAddress, LinkedAddress, Size))); @@ -53,6 +58,12 @@ bool DebugMapObject::addSymbol(StringRef Name, return InsertResult.second; } +void DebugMapObject::setRelocationMap(dsymutil::RelocationMap &RM) { + RelocMap.emplace(RM); +} + +void DebugMapObject::setInstallName(StringRef IN) { InstallName.emplace(IN); } + void DebugMapObject::print(raw_ostream &OS) const { OS << getObjectFilename() << ":\n"; // Sort the symbols in alphabetical order, like llvm-nm (and to get @@ -158,8 +169,8 @@ struct MappingTraits::YamlDMO { std::vector Entries; }; -void MappingTraits>:: - mapping(IO &io, std::pair &s) { +void MappingTraits>::mapping( + IO &io, std::pair &s) { io.mapRequired("sym", s.first); io.mapOptional("objAddr", s.second.ObjectAddress); io.mapRequired("binAddr", s.second.BinaryAddress); @@ -275,7 +286,13 @@ MappingTraits::YamlDMO::denormalize(IO &IO) { } } - dsymutil::DebugMapObject Res(Path, sys::toTimePoint(Timestamp), MachO::N_OSO); + uint8_t Type = MachO::N_OSO; + if (Path.endswith(".dylib")) { + // FIXME: find a more resilient way + Type = MachO::N_LIB; + } + dsymutil::DebugMapObject Res(Path, sys::toTimePoint(Timestamp), Type); + for (auto &Entry : Entries) { auto &Mapping = Entry.second; std::optional ObjAddress; diff --git a/llvm/tools/dsymutil/DebugMap.h b/llvm/tools/dsymutil/DebugMap.h index 86cb88d32492d..9c3a698fa1191 100644 --- a/llvm/tools/dsymutil/DebugMap.h +++ b/llvm/tools/dsymutil/DebugMap.h @@ -21,6 +21,7 @@ #ifndef LLVM_TOOLS_DSYMUTIL_DEBUGMAP_H #define LLVM_TOOLS_DSYMUTIL_DEBUGMAP_H +#include "RelocationMap.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -134,22 +135,6 @@ class DebugMap { /// linked binary for all the linked atoms in this object file. class DebugMapObject { public: - struct SymbolMapping { - std::optional ObjectAddress; - yaml::Hex64 BinaryAddress; - yaml::Hex32 Size; - - SymbolMapping(std::optional ObjectAddr, uint64_t BinaryAddress, - uint32_t Size) - : BinaryAddress(BinaryAddress), Size(Size) { - if (ObjectAddr) - ObjectAddress = *ObjectAddr; - } - - /// For YAML IO support - SymbolMapping() = default; - }; - using YAMLSymbolMapping = std::pair; using DebugMapEntry = StringMapEntry; @@ -182,6 +167,16 @@ class DebugMapObject { } const std::vector &getWarnings() const { return Warnings; } + const std::optional &getRelocationMap() const { + return RelocMap; + } + void setRelocationMap(dsymutil::RelocationMap &RM); + + const std::optional &getInstallName() const { + return InstallName; + } + void setInstallName(StringRef IN); + void print(raw_ostream &OS) const; #ifndef NDEBUG void dump() const; @@ -196,10 +191,13 @@ class DebugMapObject { std::string Filename; sys::TimePoint Timestamp; - StringMap Symbols; + StringMap Symbols; DenseMap AddressToMapping; uint8_t Type; + std::optional RelocMap; + std::optional InstallName; + std::vector Warnings; /// For YAMLIO support. @@ -225,10 +223,8 @@ namespace yaml { using namespace llvm::dsymutil; -template <> -struct MappingTraits> { - static void mapping(IO &io, - std::pair &s); +template <> struct MappingTraits> { + static void mapping(IO &io, std::pair &s); static const bool flow = true; }; @@ -237,12 +233,6 @@ template <> struct MappingTraits { static void mapping(IO &io, dsymutil::DebugMapObject &DMO); }; -template <> struct ScalarTraits { - static void output(const Triple &val, void *, raw_ostream &out); - static StringRef input(StringRef scalar, void *, Triple &value); - static QuotingType mustQuote(StringRef) { return QuotingType::Single; } -}; - template <> struct SequenceTraits>> { static size_t diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp index 39776ae5a9200..a8fea1e271227 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp @@ -189,6 +189,44 @@ static Error remarksErrorHandler(const DebugMapObject &DMO, return createFileError(FE->getFileName(), std::move(NewE)); } +template +Error DwarfLinkerForBinary::emitRelocations( + const DebugMap &DM, + std::vector> &ObjectsForLinking) { + // Return early if the "Resources" directory is not being written to. + if (!Options.ResourceDir) + return Error::success(); + + RelocationMap RM(DM.getTriple(), DM.getBinaryPath()); + for (auto &Obj : ObjectsForLinking) { + if (!Obj.OutRelocs->isInitialized()) + continue; + Obj.OutRelocs->addValidRelocs(RM); + } + + SmallString<128> InputPath; + SmallString<128> Path; + // Create the "Relocations" directory in the "Resources" directory, and + // create an architecture-specific directory in the "Relocations" directory. + StringRef ArchName = Triple::getArchName(RM.getTriple().getArch(), + RM.getTriple().getSubArch()); + sys::path::append(Path, *Options.ResourceDir, "Relocations", ArchName); + if (std::error_code EC = sys::fs::create_directories(Path.str(), true, + sys::fs::perms::all_all)) + return errorCodeToError(EC); + + // Append the file name. + sys::path::append(Path, sys::path::filename(DM.getBinaryPath())); + Path.append(".yml"); + + std::error_code EC; + raw_fd_ostream OS(Path.str(), EC, sys::fs::OF_Text); + if (EC) + return errorCodeToError(EC); + + RM.print(OS); + return Error::success(); +} static Error emitRemarks(const LinkOptions &Options, StringRef BinaryPath, StringRef ArchName, const remarks::RemarkLinker &RL) { @@ -229,30 +267,31 @@ static Error emitRemarks(const LinkOptions &Options, StringRef BinaryPath, } template -ErrorOr> -DwarfLinkerForBinary::loadObject(const DebugMapObject &Obj, - const DebugMap &DebugMap, - remarks::RemarkLinker &RL) { +ErrorOr> DwarfLinkerForBinary::loadObject( + const DebugMapObject &Obj, const DebugMap &DebugMap, + remarks::RemarkLinker &RL, + std::shared_ptr DLBRM) { auto ErrorOrObj = loadObject(Obj, DebugMap.getTriple()); std::unique_ptr Res; if (ErrorOrObj) { + auto Context = DWARFContext::create( + *ErrorOrObj, DWARFContext::ProcessDebugRelocations::Process, nullptr, + "", + [&](Error Err) { + handleAllErrors(std::move(Err), [&](ErrorInfoBase &Info) { + reportError(Info.message()); + }); + }, + [&](Error Warning) { + handleAllErrors(std::move(Warning), [&](ErrorInfoBase &Info) { + reportWarning(Info.message()); + }); + }); + DLBRM->init(*Context); Res = std::make_unique( - Obj.getObjectFilename(), - DWARFContext::create( - *ErrorOrObj, DWARFContext::ProcessDebugRelocations::Process, - nullptr, "", - [&](Error Err) { - handleAllErrors(std::move(Err), [&](ErrorInfoBase &Info) { - reportError(Info.message()); - }); - }, - [&](Error Warning) { - handleAllErrors(std::move(Warning), [&](ErrorInfoBase &Info) { - reportWarning(Info.message()); - }); - }), - std::make_unique(*this, *ErrorOrObj, Obj), + Obj.getObjectFilename(), std::move(Context), + std::make_unique(*this, *ErrorOrObj, Obj, DLBRM), [&](StringRef FileName) { BinHolder.eraseObjectEntry(FileName); }); Error E = RL.link(*ErrorOrObj); @@ -614,7 +653,7 @@ template bool DwarfLinkerForBinary::linkImpl( const DebugMap &Map, typename Linker::OutputFileType ObjectType) { - std::vector> ObjectsForLinking; + std::vector> ObjectsForLinking; DebugMap DebugMap(Map.getTriple(), Map.getBinaryPath()); @@ -668,10 +707,12 @@ bool DwarfLinkerForBinary::linkImpl( auto &Obj = DebugMap.addDebugMapObject( Path, sys::TimePoint(), MachO::N_OSO); + auto DLBRelocMap = std::make_shared(); if (ErrorOr> ErrorOrObj = - loadObject(Obj, DebugMap, RL)) { - ObjectsForLinking.emplace_back(std::move(*ErrorOrObj)); - return *ObjectsForLinking.back(); + loadObject(Obj, DebugMap, RL, + DLBRelocMap)) { + ObjectsForLinking.emplace_back(std::move(*ErrorOrObj), DLBRelocMap); + return *ObjectsForLinking.back().Object; } else { // Try and emit more helpful warnings by applying some heuristics. StringRef ObjFile = ContainerName; @@ -782,15 +823,18 @@ bool DwarfLinkerForBinary::linkImpl( continue; } + auto DLBRelocMap = std::make_shared(); if (ErrorOr> ErrorOrObj = - loadObject(*Obj, Map, RL)) { - ObjectsForLinking.emplace_back(std::move(*ErrorOrObj)); - GeneralLinker->addObjectFile(*ObjectsForLinking.back(), Loader, + loadObject(*Obj, Map, RL, DLBRelocMap)) { + ObjectsForLinking.emplace_back(std::move(*ErrorOrObj), DLBRelocMap); + GeneralLinker->addObjectFile(*ObjectsForLinking.back().Object, Loader, OnCUDieLoaded); } else { - ObjectsForLinking.push_back(std::make_unique( - Obj->getObjectFilename(), nullptr, nullptr)); - GeneralLinker->addObjectFile(*ObjectsForLinking.back()); + ObjectsForLinking.push_back( + {std::make_unique(Obj->getObjectFilename(), nullptr, + nullptr), + DLBRelocMap}); + GeneralLinker->addObjectFile(*ObjectsForLinking.back().Object); } } @@ -815,6 +859,10 @@ bool DwarfLinkerForBinary::linkImpl( if (Options.NoOutput) return true; + if (Error E = + emitRelocations(Map, ObjectsForLinking)) + return error(toString(std::move(E))); + if (Options.ResourceDir && !ParseableSwiftInterfaces.empty()) { StringRef ArchName = Triple::getArchTypeName(Map.getTriple().getArch()); if (auto E = copySwiftInterfaces(ArchName)) @@ -903,12 +951,14 @@ void DwarfLinkerForBinary::AddressManager:: continue; } if (const auto *Mapping = DMO.lookupSymbol(*SymbolName)) - ValidRelocs.emplace_back(Offset64, RelocSize, Addend, Mapping); + ValidRelocs.emplace_back(Offset64, RelocSize, Addend, Mapping->getKey(), + Mapping->getValue()); } else if (const auto *Mapping = DMO.lookupObjectAddress(SymAddress)) { // Do not store the addend. The addend was the address of the symbol in // the object file, the address in the binary that is stored in the debug // map doesn't need to be offset. - ValidRelocs.emplace_back(Offset64, RelocSize, SymOffset, Mapping); + ValidRelocs.emplace_back(Offset64, RelocSize, SymOffset, + Mapping->getKey(), Mapping->getValue()); } } } @@ -966,20 +1016,17 @@ bool DwarfLinkerForBinary::AddressManager:: } template -std::vector< - typename DwarfLinkerForBinary::AddressManager::ValidReloc> +std::vector DwarfLinkerForBinary::AddressManager::getRelocations( const std::vector &Relocs, uint64_t StartPos, uint64_t EndPos) { - std::vector< - DwarfLinkerForBinary::AddressManager::ValidReloc> - Res; + std::vector Res; auto CurReloc = partition_point(Relocs, [StartPos](const ValidReloc &Reloc) { - return Reloc.Offset < StartPos; + return (uint64_t)Reloc.Offset < StartPos; }); while (CurReloc != Relocs.end() && CurReloc->Offset >= StartPos && - CurReloc->Offset < EndPos) { + (uint64_t)CurReloc->Offset < EndPos) { Res.push_back(*CurReloc); CurReloc++; } @@ -990,12 +1037,12 @@ DwarfLinkerForBinary::AddressManager::getRelocations( template void DwarfLinkerForBinary::AddressManager::printReloc( const ValidReloc &Reloc) { - const auto &Mapping = Reloc.Mapping->getValue(); + const auto &Mapping = Reloc.SymbolMapping; const uint64_t ObjectAddress = Mapping.ObjectAddress ? uint64_t(*Mapping.ObjectAddress) : std::numeric_limits::max(); - outs() << "Found valid debug map entry: " << Reloc.Mapping->getKey() << "\t" + outs() << "Found valid debug map entry: " << Reloc.SymbolName << "\t" << format("0x%016" PRIx64 " => 0x%016" PRIx64 "\n", ObjectAddress, uint64_t(Mapping.BinaryAddress)); } @@ -1004,8 +1051,8 @@ template int64_t DwarfLinkerForBinary::AddressManager::getRelocValue( const ValidReloc &Reloc) { int64_t AddrAdjust = relocate(Reloc); - if (Reloc.Mapping->getValue().ObjectAddress) - AddrAdjust -= uint64_t(*Reloc.Mapping->getValue().ObjectAddress); + if (Reloc.SymbolMapping.ObjectAddress) + AddrAdjust -= uint64_t(*Reloc.SymbolMapping.ObjectAddress); return AddrAdjust; } @@ -1116,12 +1163,40 @@ std::optional DwarfLinkerForBinary::AddressManager< } } +template +std::optional DwarfLinkerForBinary::AddressManager< + AddressesMapBase>::getLibraryInstallName() { + return LibInstallName; +} + template uint64_t DwarfLinkerForBinary::AddressManager::relocate( const ValidReloc &Reloc) const { - return Reloc.Mapping->getValue().BinaryAddress + Reloc.Addend; + return Reloc.SymbolMapping.BinaryAddress + Reloc.Addend; +} + +template +void DwarfLinkerForBinary::AddressManager< + AddressesMapBase>::updateAndSaveValidRelocs(bool IsDWARF5, + uint64_t OriginalUnitOffset, + int64_t LinkedOffset, + uint64_t StartOffset, + uint64_t EndOffset) { + std::vector InRelocs = + getRelocations(ValidDebugInfoRelocs, StartOffset, EndOffset); + if (IsDWARF5) + InRelocs = getRelocations(ValidDebugAddrRelocs, StartOffset, EndOffset); + DwarfLinkerRelocMap->updateAndSaveValidRelocs( + IsDWARF5, InRelocs, OriginalUnitOffset, LinkedOffset); } +template +void DwarfLinkerForBinary::AddressManager:: + updateRelocationsWithUnitOffset(uint64_t OriginalUnitOffset, + uint64_t OutputUnitOffset) { + DwarfLinkerRelocMap->updateRelocationsWithUnitOffset(OriginalUnitOffset, + OutputUnitOffset); +} /// Apply the valid relocations found by findValidRelocs() to /// the buffer \p Data, taking into account that Data is at \p BaseOffset /// in the debug_info section. @@ -1133,6 +1208,7 @@ uint64_t DwarfLinkerForBinary::AddressManager::relocate( template bool DwarfLinkerForBinary::AddressManager::applyValidRelocs( MutableArrayRef Data, uint64_t BaseOffset, bool IsLittleEndian) { + std::vector Relocs = getRelocations( ValidDebugInfoRelocs, BaseOffset, BaseOffset + Data.size()); @@ -1148,9 +1224,47 @@ bool DwarfLinkerForBinary::AddressManager::applyValidRelocs( assert(CurReloc.Size <= sizeof(Buf)); memcpy(&Data[CurReloc.Offset - BaseOffset], Buf, CurReloc.Size); } - return Relocs.size() > 0; } +void DwarfLinkerForBinaryRelocationMap::init(DWARFContext &Context) { + for (const std::unique_ptr &CU : Context.compile_units()) + StoredValidDebugInfoRelocsMap.insert( + std::make_pair(CU->getOffset(), std::vector())); + // FIXME: Support relocations debug_addr (DWARF5). +} + +void DwarfLinkerForBinaryRelocationMap::addValidRelocs(RelocationMap &RM) { + for (const auto &DebugInfoRelocs : StoredValidDebugInfoRelocsMap) { + for (const auto &InfoReloc : DebugInfoRelocs.second) + RM.addRelocationMapEntry(InfoReloc); + } + // FIXME: Support relocations debug_addr (DWARF5). +} + +void DwarfLinkerForBinaryRelocationMap::updateRelocationsWithUnitOffset( + uint64_t OriginalUnitOffset, uint64_t OutputUnitOffset) { + std::vector &StoredValidDebugInfoRelocs = + StoredValidDebugInfoRelocsMap[OriginalUnitOffset]; + for (ValidReloc &R : StoredValidDebugInfoRelocs) { + R.Offset = (uint64_t)R.Offset + OutputUnitOffset; + } + // FIXME: Support relocations debug_addr (DWARF5). +} + +void DwarfLinkerForBinaryRelocationMap::updateAndSaveValidRelocs( + bool IsDWARF5, std::vector &InRelocs, uint64_t UnitOffset, + int64_t LinkedOffset) { + std::vector &OutRelocs = + StoredValidDebugInfoRelocsMap[UnitOffset]; + if (IsDWARF5) + OutRelocs = StoredValidDebugAddrRelocsMap[UnitOffset]; + + for (ValidReloc &R : InRelocs) { + OutRelocs.emplace_back(R.Offset + LinkedOffset, R.Size, R.Addend, + R.SymbolName, R.SymbolMapping); + } +} + } // namespace dsymutil } // namespace llvm diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.h b/llvm/tools/dsymutil/DwarfLinkerForBinary.h index 230f569a6988c..328cd9197d0d1 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.h +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.h @@ -13,6 +13,7 @@ #include "DebugMap.h" #include "LinkUtils.h" #include "MachOUtils.h" +#include "RelocationMap.h" #include "llvm/DWARFLinker/DWARFLinker.h" #include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" #include "llvm/DWARFLinker/DWARFLinkerDeclContext.h" @@ -21,10 +22,48 @@ #include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkLinker.h" #include +#include namespace llvm { namespace dsymutil { +/// DwarfLinkerForBinaryRelocationMap contains the logic to handle the +/// relocations and to store them inside an associated RelocationMap. +class DwarfLinkerForBinaryRelocationMap { +public: + void init(DWARFContext &Context); + + bool isInitialized() { + return StoredValidDebugInfoRelocsMap.getMemorySize() != 0; + } + + void addValidRelocs(RelocationMap &RM); + + void updateAndSaveValidRelocs(bool IsDWARF5, + std::vector &InRelocs, + uint64_t UnitOffset, int64_t LinkedOffset); + + void updateRelocationsWithUnitOffset(uint64_t OriginalUnitOffset, + uint64_t OutputUnitOffset); + + /// Map compilation unit offset to the valid relocations to store + /// @{ + DenseMap> StoredValidDebugInfoRelocsMap; + DenseMap> StoredValidDebugAddrRelocsMap; + /// @} + + DwarfLinkerForBinaryRelocationMap() = default; +}; + +template struct ObjectWithRelocMap { + ObjectWithRelocMap( + std::unique_ptr Object, + std::shared_ptr OutRelocs) + : Object(std::move(Object)), OutRelocs(OutRelocs) {} + std::unique_ptr Object; + std::shared_ptr OutRelocs; +}; + /// The core of the Dsymutil Dwarf linking logic. /// /// The link of the dwarf information from the object files will be @@ -67,26 +106,11 @@ class DwarfLinkerForBinary { /// Keeps track of relocations. template class AddressManager : public AddressesMapBase { - struct ValidReloc { - uint64_t Offset; - uint32_t Size; - uint64_t Addend; - const DebugMapObject::DebugMapEntry *Mapping; - - ValidReloc(uint64_t Offset, uint32_t Size, uint64_t Addend, - const DebugMapObject::DebugMapEntry *Mapping) - : Offset(Offset), Size(Size), Addend(Addend), Mapping(Mapping) {} - - bool operator<(const ValidReloc &RHS) const { - return Offset < RHS.Offset; - } - bool operator<(uint64_t RHS) const { return Offset < RHS; } - }; const DwarfLinkerForBinary &Linker; /// The valid relocations for the current DebugMapObject. - /// This vector is sorted by relocation offset. + /// These vectors are sorted by relocation offset. /// { std::vector ValidDebugInfoRelocs; std::vector ValidDebugAddrRelocs; @@ -94,6 +118,12 @@ class DwarfLinkerForBinary { StringRef SrcFileName; + uint8_t DebugMapObjectType; + + std::shared_ptr DwarfLinkerRelocMap; + + std::optional LibInstallName; + /// Returns list of valid relocations from \p Relocs, /// between \p StartOffset and \p NextOffset. /// @@ -115,9 +145,29 @@ class DwarfLinkerForBinary { public: AddressManager(DwarfLinkerForBinary &Linker, const object::ObjectFile &Obj, - const DebugMapObject &DMO) - : Linker(Linker), SrcFileName(DMO.getObjectFilename()) { - findValidRelocsInDebugSections(Obj, DMO); + const DebugMapObject &DMO, + std::shared_ptr DLBRM) + : Linker(Linker), SrcFileName(DMO.getObjectFilename()), + DebugMapObjectType(MachO::N_OSO), DwarfLinkerRelocMap(DLBRM) { + if (DMO.getRelocationMap().has_value()) { + DebugMapObjectType = MachO::N_LIB; + LibInstallName.emplace(DMO.getInstallName().value()); + const RelocationMap &RM = DMO.getRelocationMap().value(); + for (const auto &Reloc : RM.relocations()) { + const auto *DebugMapEntry = DMO.lookupSymbol(Reloc.SymbolName); + if (!DebugMapEntry) + continue; + std::optional ObjAddress; + ObjAddress.emplace(DebugMapEntry->getValue().ObjectAddress.value()); + ValidDebugInfoRelocs.emplace_back( + Reloc.Offset, Reloc.Size, Reloc.Addend, Reloc.SymbolName, + SymbolMapping(ObjAddress, DebugMapEntry->getValue().BinaryAddress, + DebugMapEntry->getValue().Size)); + // FIXME: Support relocations debug_addr. + } + } else { + findValidRelocsInDebugSections(Obj, DMO); + } } ~AddressManager() override { clear(); } @@ -158,9 +208,20 @@ class DwarfLinkerForBinary { std::optional getSubprogramRelocAdjustment(const DWARFDie &DIE) override; + std::optional getLibraryInstallName() override; + bool applyValidRelocs(MutableArrayRef Data, uint64_t BaseOffset, bool IsLittleEndian) override; + bool needToSaveValidRelocs() override { return true; } + + void updateAndSaveValidRelocs(bool IsDWARF5, uint64_t OriginalUnitOffset, + int64_t LinkedOffset, uint64_t StartOffset, + uint64_t EndOffset) override; + + void updateRelocationsWithUnitOffset(uint64_t OriginalUnitOffset, + uint64_t OutputUnitOffset) override; + void clear() override { ValidDebugInfoRelocs.clear(); ValidDebugAddrRelocs.clear(); @@ -180,11 +241,11 @@ class DwarfLinkerForBinary { /// Attempt to load a debug object from disk. ErrorOr loadObject(const DebugMapObject &Obj, const Triple &triple); - template - ErrorOr> loadObject(const DebugMapObject &Obj, - const DebugMap &DebugMap, - remarks::RemarkLinker &RL); + ErrorOr> + loadObject(const DebugMapObject &Obj, const DebugMap &DebugMap, + remarks::RemarkLinker &RL, + std::shared_ptr DLBRM); void collectRelocationsToApplyToSwiftReflectionSections( const object::SectionRef &Section, StringRef &Contents, @@ -207,6 +268,11 @@ class DwarfLinkerForBinary { bool linkImpl(const DebugMap &Map, typename Linker::OutputFileType ObjectType); + template + Error emitRelocations( + const DebugMap &DM, + std::vector> &ObjectsForLinking); + raw_fd_ostream &OutFile; BinaryHolder &BinHolder; LinkOptions Options; diff --git a/llvm/tools/dsymutil/LinkUtils.h b/llvm/tools/dsymutil/LinkUtils.h index 88c17d5036899..0bf6d9aac1a3f 100644 --- a/llvm/tools/dsymutil/LinkUtils.h +++ b/llvm/tools/dsymutil/LinkUtils.h @@ -93,6 +93,12 @@ struct LinkOptions { llvm::IntrusiveRefCntPtr VFS = vfs::getRealFileSystem(); + /// -build-variant-suffix. + std::string BuildVariantSuffix; + + /// Paths where to search for the .dSYM files of merged libraries. + std::vector DSYMSearchPaths; + /// Fields used for linking and placing remarks into the .dSYM bundle. /// @{ diff --git a/llvm/tools/dsymutil/MachODebugMapParser.cpp b/llvm/tools/dsymutil/MachODebugMapParser.cpp index d9bf2301e21e5..9623b71714582 100644 --- a/llvm/tools/dsymutil/MachODebugMapParser.cpp +++ b/llvm/tools/dsymutil/MachODebugMapParser.cpp @@ -9,6 +9,7 @@ #include "BinaryHolder.h" #include "DebugMap.h" #include "MachOUtils.h" +#include "RelocationMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Object/MachO.h" @@ -28,9 +29,13 @@ class MachODebugMapParser { public: MachODebugMapParser(llvm::IntrusiveRefCntPtr VFS, StringRef BinaryPath, ArrayRef Archs, - StringRef PathPrefix = "", bool Verbose = false) + ArrayRef DSYMSearchPaths, + StringRef PathPrefix = "", StringRef VariantSuffix = "", + bool Verbose = false) : BinaryPath(std::string(BinaryPath)), Archs(Archs.begin(), Archs.end()), - PathPrefix(std::string(PathPrefix)), BinHolder(VFS, Verbose), + DSYMSearchPaths(DSYMSearchPaths.begin(), DSYMSearchPaths.end()), + PathPrefix(std::string(PathPrefix)), + VariantSuffix(std::string(VariantSuffix)), BinHolder(VFS, Verbose), CurrentDebugMapObject(nullptr), SkipDebugMapObject(false) {} /// Parses and returns the DebugMaps of the input binary. The binary contains @@ -47,7 +52,9 @@ class MachODebugMapParser { private: std::string BinaryPath; SmallVector Archs; + SmallVector DSYMSearchPaths; std::string PathPrefix; + std::string VariantSuffix; /// Owns the MemoryBuffer for the main binary. BinaryHolder BinHolder; @@ -87,6 +94,9 @@ class MachODebugMapParser { void switchToNewDebugMapObject(StringRef Filename, sys::TimePoint Timestamp); + void + switchToNewLibDebugMapObject(StringRef Filename, + sys::TimePoint Timestamp); void resetParserState(); uint64_t getMainBinarySymbolAddress(StringRef Name); std::vector getMainBinarySymbolNames(uint64_t Value); @@ -176,8 +186,6 @@ void MachODebugMapParser::addCommonSymbols() { /// everything up to add symbols to the new one. void MachODebugMapParser::switchToNewDebugMapObject( StringRef Filename, sys::TimePoint Timestamp) { - addCommonSymbols(); - resetParserState(); SmallString<80> Path(PathPrefix); sys::path::append(Path, Filename); @@ -198,11 +206,138 @@ void MachODebugMapParser::switchToNewDebugMapObject( return; } + addCommonSymbols(); + resetParserState(); + CurrentDebugMapObject = &Result->addDebugMapObject(Path, Timestamp, MachO::N_OSO); + loadCurrentObjectFileSymbols(*Object); } +/// Create a new DebugMapObject of type MachO::N_LIB. +/// This function resets the state of the parser that was +/// referring to the last object file and sets everything +/// up to add symbols to the new one. +void MachODebugMapParser::switchToNewLibDebugMapObject( + StringRef Filename, sys::TimePoint Timestamp) { + + if (DSYMSearchPaths.empty()) { + Warning("no dSYM search path was specified"); + return; + } + + StringRef LeafName = sys::path::filename(Filename); + SmallString<128> VariantLeafName; + SmallString<128> ProductName(LeafName); + + // For Framework.framework/Framework and -build-variant-suffix=_debug, + // look in the following order: + // 1) Framework.framework.dSYM/Contents/Resources/DWARF/Framework_debug + // 2) Framework.framework.dSYM/Contents/Resources/DWARF/Framework + // + // For libName.dylib and -build-variant-suffix=_debug, + // look in the following order: + // 1) libName.dylib.dSYM/Contents/Resources/DWARF/libName_debug.dylib + // 2) libName.dylib.dSYM/Contents/Resources/DWARF/libName.dylib + + size_t libExt = LeafName.rfind(".dylib"); + if (libExt != StringRef::npos) { + if (!VariantSuffix.empty()) { + VariantLeafName.append(LeafName.substr(0, libExt)); + VariantLeafName.append(VariantSuffix); + VariantLeafName.append(".dylib"); + } + } else { + // Expected to be a framework + ProductName.append(".framework"); + if (!VariantSuffix.empty()) { + VariantLeafName.append(LeafName); + VariantLeafName.append(VariantSuffix); + } + } + + for (auto DSYMSearchPath : DSYMSearchPaths) { + SmallString<256> Path(DSYMSearchPath); + SmallString<256> FallbackPath(Path); + + SmallString<256> DSYMPath(ProductName); + DSYMPath.append(".dSYM"); + sys::path::append(DSYMPath, "Contents", "Resources", "DWARF"); + + if (!VariantSuffix.empty()) { + sys::path::append(Path, DSYMPath, VariantLeafName); + sys::path::append(FallbackPath, DSYMPath, LeafName); + } else { + sys::path::append(Path, DSYMPath, LeafName); + } + + auto ObjectEntry = BinHolder.getObjectEntry(Path, Timestamp); + if (!ObjectEntry) { + auto Err = ObjectEntry.takeError(); + Warning("unable to open object file: " + toString(std::move(Err)), + Path.str()); + if (!VariantSuffix.empty()) { + ObjectEntry = BinHolder.getObjectEntry(FallbackPath, Timestamp); + if (!ObjectEntry) { + auto Err = ObjectEntry.takeError(); + Warning("unable to open object file: " + toString(std::move(Err)), + FallbackPath.str()); + continue; + } + Path.assign(FallbackPath); + } else { + continue; + } + } + + auto Object = + ObjectEntry->getObjectAs(Result->getTriple()); + if (!Object) { + auto Err = Object.takeError(); + Warning("unable to open object file: " + toString(std::move(Err)), + Path.str()); + continue; + } + + if (CurrentDebugMapObject && + CurrentDebugMapObject->getType() == MachO::N_LIB && + CurrentDebugMapObject->getObjectFilename().compare(Path.str()) == 0) { + return; + } + + addCommonSymbols(); + resetParserState(); + + CurrentDebugMapObject = + &Result->addDebugMapObject(Path, Timestamp, MachO::N_LIB); + + CurrentDebugMapObject->setInstallName(Filename); + + SmallString<256> RMPath(DSYMSearchPath); + sys::path::append(RMPath, ProductName); + RMPath.append(".dSYM"); + StringRef ArchName = Triple::getArchName(Result->getTriple().getArch(), + Result->getTriple().getSubArch()); + sys::path::append(RMPath, "Contents", "Resources", "Relocations", ArchName); + sys::path::append(RMPath, LeafName); + RMPath.append(".yml"); + const auto &RelocMapPtrOrErr = + RelocationMap::parseYAMLRelocationMap(RMPath, PathPrefix); + if (auto EC = RelocMapPtrOrErr.getError()) { + Warning("cannot parse relocation map file: " + EC.message(), + RMPath.str()); + return; + } + CurrentDebugMapObject->setRelocationMap(*RelocMapPtrOrErr->get()); + + loadCurrentObjectFileSymbols(*Object); + + // Found and loaded new dSYM file + return; + } +} + static std::string getArchName(const object::MachOObjectFile &Obj) { Triple T = Obj.getArchTriple(); return std::string(T.getArchName()); @@ -275,23 +410,39 @@ struct DarwinStabName { const char *Name; }; -const struct DarwinStabName DarwinStabNames[] = { - {MachO::N_GSYM, "N_GSYM"}, {MachO::N_FNAME, "N_FNAME"}, - {MachO::N_FUN, "N_FUN"}, {MachO::N_STSYM, "N_STSYM"}, - {MachO::N_LCSYM, "N_LCSYM"}, {MachO::N_BNSYM, "N_BNSYM"}, - {MachO::N_PC, "N_PC"}, {MachO::N_AST, "N_AST"}, - {MachO::N_OPT, "N_OPT"}, {MachO::N_RSYM, "N_RSYM"}, - {MachO::N_SLINE, "N_SLINE"}, {MachO::N_ENSYM, "N_ENSYM"}, - {MachO::N_SSYM, "N_SSYM"}, {MachO::N_SO, "N_SO"}, - {MachO::N_OSO, "N_OSO"}, {MachO::N_LSYM, "N_LSYM"}, - {MachO::N_BINCL, "N_BINCL"}, {MachO::N_SOL, "N_SOL"}, - {MachO::N_PARAMS, "N_PARAM"}, {MachO::N_VERSION, "N_VERS"}, - {MachO::N_OLEVEL, "N_OLEV"}, {MachO::N_PSYM, "N_PSYM"}, - {MachO::N_EINCL, "N_EINCL"}, {MachO::N_ENTRY, "N_ENTRY"}, - {MachO::N_LBRAC, "N_LBRAC"}, {MachO::N_EXCL, "N_EXCL"}, - {MachO::N_RBRAC, "N_RBRAC"}, {MachO::N_BCOMM, "N_BCOMM"}, - {MachO::N_ECOMM, "N_ECOMM"}, {MachO::N_ECOML, "N_ECOML"}, - {MachO::N_LENG, "N_LENG"}, {0, nullptr}}; +const struct DarwinStabName DarwinStabNames[] = {{MachO::N_GSYM, "N_GSYM"}, + {MachO::N_FNAME, "N_FNAME"}, + {MachO::N_FUN, "N_FUN"}, + {MachO::N_STSYM, "N_STSYM"}, + {MachO::N_LCSYM, "N_LCSYM"}, + {MachO::N_BNSYM, "N_BNSYM"}, + {MachO::N_PC, "N_PC"}, + {MachO::N_AST, "N_AST"}, + {MachO::N_OPT, "N_OPT"}, + {MachO::N_RSYM, "N_RSYM"}, + {MachO::N_SLINE, "N_SLINE"}, + {MachO::N_ENSYM, "N_ENSYM"}, + {MachO::N_SSYM, "N_SSYM"}, + {MachO::N_SO, "N_SO"}, + {MachO::N_OSO, "N_OSO"}, + {MachO::N_LIB, "N_LIB"}, + {MachO::N_LSYM, "N_LSYM"}, + {MachO::N_BINCL, "N_BINCL"}, + {MachO::N_SOL, "N_SOL"}, + {MachO::N_PARAMS, "N_PARAM"}, + {MachO::N_VERSION, "N_VERS"}, + {MachO::N_OLEVEL, "N_OLEV"}, + {MachO::N_PSYM, "N_PSYM"}, + {MachO::N_EINCL, "N_EINCL"}, + {MachO::N_ENTRY, "N_ENTRY"}, + {MachO::N_LBRAC, "N_LBRAC"}, + {MachO::N_EXCL, "N_EXCL"}, + {MachO::N_RBRAC, "N_RBRAC"}, + {MachO::N_BCOMM, "N_BCOMM"}, + {MachO::N_ECOMM, "N_ECOMM"}, + {MachO::N_ECOML, "N_ECOML"}, + {MachO::N_LENG, "N_LENG"}, + {0, nullptr}}; static const char *getDarwinStabString(uint8_t NType) { for (unsigned i = 0; DarwinStabNames[i].Name; i++) { @@ -477,13 +628,25 @@ void MachODebugMapParser::handleStabSymbolTableEntry( const char *Name = &MainBinaryStrings.data()[StringIndex]; + // An N_LIB entry represents the start of a new library file description. + if (Type == MachO::N_LIB) { + switchToNewLibDebugMapObject(Name, sys::toTimePoint(Value)); + return; + } + // An N_OSO entry represents the start of a new object file description. + // If an N_LIB entry was present, this is parsed only if the library + // dSYM file could not be found. if (Type == MachO::N_OSO) { - if (Duplicates.count(OSO(Name, Value))) { - SkipDebugMapObject = true; - return; + if (!CurrentDebugMapObject || + CurrentDebugMapObject->getType() != MachO::N_LIB) { + if (Duplicates.count(OSO(Name, Value))) { + SkipDebugMapObject = true; + return; + } + switchToNewDebugMapObject(Name, sys::toTimePoint(Value)); } - return switchToNewDebugMapObject(Name, sys::toTimePoint(Value)); + return; } if (SkipDebugMapObject) @@ -694,18 +857,23 @@ namespace dsymutil { llvm::ErrorOr>> parseDebugMap(llvm::IntrusiveRefCntPtr VFS, StringRef InputFile, ArrayRef Archs, - StringRef PrependPath, bool Verbose, bool InputIsYAML) { + ArrayRef DSYMSearchPaths, StringRef PrependPath, + StringRef VariantSuffix, bool Verbose, bool InputIsYAML) { if (InputIsYAML) return DebugMap::parseYAMLDebugMap(InputFile, PrependPath, Verbose); - MachODebugMapParser Parser(VFS, InputFile, Archs, PrependPath, Verbose); + MachODebugMapParser Parser(VFS, InputFile, Archs, DSYMSearchPaths, + PrependPath, VariantSuffix, Verbose); + return Parser.parse(); } bool dumpStab(llvm::IntrusiveRefCntPtr VFS, StringRef InputFile, ArrayRef Archs, - StringRef PrependPath) { - MachODebugMapParser Parser(VFS, InputFile, Archs, PrependPath, false); + ArrayRef DSYMSearchPaths, StringRef PrependPath, + StringRef VariantSuffix) { + MachODebugMapParser Parser(VFS, InputFile, Archs, DSYMSearchPaths, + PrependPath, VariantSuffix, false); return Parser.dumpStab(); } } // namespace dsymutil diff --git a/llvm/tools/dsymutil/Options.td b/llvm/tools/dsymutil/Options.td index 79f04fdfb0360..da071341cc01f 100644 --- a/llvm/tools/dsymutil/Options.td +++ b/llvm/tools/dsymutil/Options.td @@ -201,3 +201,14 @@ def linker: Separate<["--", "-"], "linker">, HelpText<"Specify the desired type of DWARF linker. Defaults to 'apple'">, Group; def: Joined<["--", "-"], "linker=">, Alias; + +def build_variant_suffix: Separate<["--", "-"], "build-variant-suffix">, + MetaVarName<"">, + HelpText<"Specify the build variant suffix used to build the executabe file.">, + Group; +def: Joined<["--", "-"], "build-variant-suffix=">, Alias; + +def dsym_search_path: Separate<["-", "--"], "D">, + MetaVarName<"">, + HelpText<"Specify a directory that contain dSYM files to search for.">, + Group; diff --git a/llvm/tools/dsymutil/RelocationMap.cpp b/llvm/tools/dsymutil/RelocationMap.cpp new file mode 100644 index 0000000000000..5921e7c9c2495 --- /dev/null +++ b/llvm/tools/dsymutil/RelocationMap.cpp @@ -0,0 +1,92 @@ +//===- tools/dsymutil/RelocationMap.cpp - Relocation map representation---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RelocationMap.h" + +namespace llvm { + +namespace dsymutil { + +void RelocationMap::print(raw_ostream &OS) const { + yaml::Output yout(OS, /* Ctxt = */ nullptr, /* WrapColumn = */ 0); + yout << const_cast(*this); +} + +#ifndef NDEBUG +void RelocationMap::dump() const { print(errs()); } +#endif + +void RelocationMap::addRelocationMapEntry(const ValidReloc &Relocation) { + Relocations.push_back(Relocation); +} + +namespace { + +struct YAMLContext { + StringRef PrependPath; + Triple BinaryTriple; +}; + +} // end anonymous namespace + +ErrorOr> +RelocationMap::parseYAMLRelocationMap(StringRef InputFile, + StringRef PrependPath) { + auto ErrOrFile = MemoryBuffer::getFileOrSTDIN(InputFile); + if (auto Err = ErrOrFile.getError()) + return Err; + + YAMLContext Ctxt; + + Ctxt.PrependPath = PrependPath; + + std::unique_ptr Result; + yaml::Input yin((*ErrOrFile)->getBuffer(), &Ctxt); + yin >> Result; + + if (auto EC = yin.error()) + return EC; + return std::move(Result); +} + +} // end namespace dsymutil + +namespace yaml { + +void MappingTraits::mapping(IO &io, + dsymutil::ValidReloc &VR) { + io.mapRequired("offset", VR.Offset); + io.mapRequired("size", VR.Size); + io.mapRequired("addend", VR.Addend); + io.mapRequired("symName", VR.SymbolName); + io.mapOptional("symObjAddr", VR.SymbolMapping.ObjectAddress); + io.mapRequired("symBinAddr", VR.SymbolMapping.BinaryAddress); + io.mapRequired("symSize", VR.SymbolMapping.Size); +} + +void MappingTraits::mapping( + IO &io, dsymutil::RelocationMap &RM) { + io.mapRequired("triple", RM.BinaryTriple); + io.mapRequired("binary-path", RM.BinaryPath); + if (void *Ctxt = io.getContext()) + reinterpret_cast(Ctxt)->BinaryTriple = RM.BinaryTriple; + io.mapRequired("relocations", RM.Relocations); +} + +void MappingTraits>::mapping( + IO &io, std::unique_ptr &RM) { + if (!RM) + RM.reset(new RelocationMap()); + io.mapRequired("triple", RM->BinaryTriple); + io.mapRequired("binary-path", RM->BinaryPath); + if (void *Ctxt = io.getContext()) + reinterpret_cast(Ctxt)->BinaryTriple = RM->BinaryTriple; + io.mapRequired("relocations", RM->Relocations); +} +} // end namespace yaml +} // end namespace llvm diff --git a/llvm/tools/dsymutil/RelocationMap.h b/llvm/tools/dsymutil/RelocationMap.h new file mode 100644 index 0000000000000..3d851acf2b892 --- /dev/null +++ b/llvm/tools/dsymutil/RelocationMap.h @@ -0,0 +1,160 @@ +//===- tools/dsymutil/RelocationMap.h -------------------------- *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// This file contains the class declaration of the RelocationMap +/// entity. RelocationMap lists all the relocations of all the +/// atoms used in the object files linked together to +/// produce an executable. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_DSYMUTIL_RELOCATIONMAP_H +#define LLVM_TOOLS_DSYMUTIL_RELOCATIONMAP_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/TargetParser/Triple.h" + +#include +#include +#include + +namespace llvm { + +class raw_ostream; + +namespace dsymutil { + +struct SymbolMapping { + std::optional ObjectAddress; + yaml::Hex64 BinaryAddress; + yaml::Hex32 Size; + + SymbolMapping(std::optional ObjectAddr, uint64_t BinaryAddress, + uint32_t Size) + : BinaryAddress(BinaryAddress), Size(Size) { + if (ObjectAddr) + ObjectAddress = *ObjectAddr; + } + + /// For YAML IO support + SymbolMapping() = default; +}; + +/// ValidReloc represents one relocation entry described by the RelocationMap. +/// It contains a list of DWARF relocations to apply to a linked binary. +class ValidReloc { +public: + yaml::Hex64 Offset; + yaml::Hex32 Size; + yaml::Hex64 Addend; + std::string SymbolName; + struct SymbolMapping SymbolMapping; + + struct SymbolMapping getSymbolMapping() const { return SymbolMapping; } + + ValidReloc(uint64_t Offset, uint32_t Size, uint64_t Addend, + StringRef SymbolName, struct SymbolMapping SymbolMapping) + : Offset(Offset), Size(Size), Addend(Addend), SymbolName(SymbolName), + SymbolMapping(SymbolMapping) {} + + bool operator<(const ValidReloc &RHS) const { return Offset < RHS.Offset; } + + /// For YAMLIO support. + ValidReloc() = default; +}; + +/// The RelocationMap object stores the list of relocation entries for a binary +class RelocationMap { + Triple BinaryTriple; + std::string BinaryPath; + using RelocContainer = std::vector; + + RelocContainer Relocations; + + /// For YAML IO support. + ///@{ + friend yaml::MappingTraits>; + friend yaml::MappingTraits; + + RelocationMap() = default; + ///@} + +public: + RelocationMap(const Triple &BinaryTriple, StringRef BinaryPath) + : BinaryTriple(BinaryTriple), BinaryPath(std::string(BinaryPath)) {} + + using const_iterator = RelocContainer::const_iterator; + + iterator_range relocations() const { + return make_range(begin(), end()); + } + + const_iterator begin() const { return Relocations.begin(); } + + const_iterator end() const { return Relocations.end(); } + + size_t getNumberOfEntries() const { return Relocations.size(); } + + /// This function adds a ValidReloc to the list owned by this + /// relocation map. + void addRelocationMapEntry(const ValidReloc &Relocation); + + const Triple &getTriple() const { return BinaryTriple; } + + StringRef getBinaryPath() const { return BinaryPath; } + + void print(raw_ostream &OS) const; + +#ifndef NDEBUG + void dump() const; +#endif + + /// Read a relocation map from \a InputFile. + static ErrorOr> + parseYAMLRelocationMap(StringRef InputFile, StringRef PrependPath); +}; + +} // end namespace dsymutil +} // end namespace llvm + +LLVM_YAML_IS_SEQUENCE_VECTOR(dsymutil::ValidReloc) + +namespace llvm { +namespace yaml { + +using namespace llvm::dsymutil; + +template <> struct MappingTraits { + static void mapping(IO &io, dsymutil::ValidReloc &VR); + static const bool flow = true; +}; + +template <> struct MappingTraits { + struct YamlRM; + static void mapping(IO &io, dsymutil::RelocationMap &RM); +}; + +template <> struct MappingTraits> { + struct YamlRM; + static void mapping(IO &io, std::unique_ptr &RM); +}; + +template <> struct ScalarTraits { + static void output(const Triple &val, void *, raw_ostream &out); + static StringRef input(StringRef scalar, void *, Triple &value); + static QuotingType mustQuote(StringRef) { return QuotingType::Single; } +}; + +} // end namespace yaml +} // end namespace llvm + +#endif // LLVM_TOOLS_DSYMUTIL_RELOCATIONMAP_H diff --git a/llvm/tools/dsymutil/dsymutil.cpp b/llvm/tools/dsymutil/dsymutil.cpp index 104895b1a90bd..2dd123318e00b 100644 --- a/llvm/tools/dsymutil/dsymutil.cpp +++ b/llvm/tools/dsymutil/dsymutil.cpp @@ -398,6 +398,12 @@ static Expected getOptions(opt::InputArgList &Args) { Options.LinkOpts.RemarksKeepAll = !Args.hasArg(OPT_remarks_drop_without_debug); + if (opt::Arg *BuildVariantSuffix = Args.getLastArg(OPT_build_variant_suffix)) + Options.LinkOpts.BuildVariantSuffix = BuildVariantSuffix->getValue(); + + for (auto *SearchPath : Args.filtered(OPT_dsym_search_path)) + Options.LinkOpts.DSYMSearchPaths.push_back(SearchPath->getValue()); + if (Error E = verifyOptions(Options)) return std::move(E); return Options; @@ -670,15 +676,18 @@ int dsymutil_main(int argc, char **argv, const llvm::ToolContext &) { // Dump the symbol table for each input file and requested arch if (Options.DumpStab) { if (!dumpStab(Options.LinkOpts.VFS, InputFile, Options.Archs, - Options.LinkOpts.PrependPath)) + Options.LinkOpts.DSYMSearchPaths, + Options.LinkOpts.PrependPath, + Options.LinkOpts.BuildVariantSuffix)) return EXIT_FAILURE; continue; } - auto DebugMapPtrsOrErr = - parseDebugMap(Options.LinkOpts.VFS, InputFile, Options.Archs, - Options.LinkOpts.PrependPath, Options.LinkOpts.Verbose, - Options.InputIsYAMLDebugMap); + auto DebugMapPtrsOrErr = parseDebugMap( + Options.LinkOpts.VFS, InputFile, Options.Archs, + Options.LinkOpts.DSYMSearchPaths, Options.LinkOpts.PrependPath, + Options.LinkOpts.BuildVariantSuffix, Options.LinkOpts.Verbose, + Options.InputIsYAMLDebugMap); if (auto EC = DebugMapPtrsOrErr.getError()) { WithColor::error() << "cannot parse the debug map for '" << InputFile diff --git a/llvm/tools/dsymutil/dsymutil.h b/llvm/tools/dsymutil/dsymutil.h index ddecd8a76c7fc..5504dd57c7e55 100644 --- a/llvm/tools/dsymutil/dsymutil.h +++ b/llvm/tools/dsymutil/dsymutil.h @@ -35,12 +35,14 @@ namespace dsymutil { ErrorOr>> parseDebugMap(llvm::IntrusiveRefCntPtr VFS, StringRef InputFile, ArrayRef Archs, - StringRef PrependPath, bool Verbose, bool InputIsYAML); + ArrayRef DSYMSearchPaths, StringRef PrependPath, + StringRef VariantSuffix, bool Verbose, bool InputIsYAML); /// Dump the symbol table. bool dumpStab(llvm::IntrusiveRefCntPtr VFS, StringRef InputFile, ArrayRef Archs, - StringRef PrependPath = ""); + ArrayRef DSYMSearchPaths, StringRef PrependPath = "", + StringRef VariantSuffix = ""); } // end namespace dsymutil } // end namespace llvm diff --git a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp index d97dd7392b0df..02a94596ec764 100644 --- a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp +++ b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp @@ -132,11 +132,23 @@ class ObjFileAddressMap : public AddressMapBase { return std::nullopt; } + std::optional getLibraryInstallName() override { + return std::nullopt; + } + bool applyValidRelocs(MutableArrayRef, uint64_t, bool) override { // no need to apply relocations to the linked binary. return false; } + bool needToSaveValidRelocs() override { return false; } + + void updateAndSaveValidRelocs(bool, uint64_t, int64_t, uint64_t, + uint64_t) override {} + + void updateRelocationsWithUnitOffset(uint64_t OriginalUnitOffset, + uint64_t OutputUnitOffset) override {} + void clear() override {} protected: diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp index e32aa8ab8f5bf..fede89e9c1167 100644 --- a/llvm/tools/llvm-nm/llvm-nm.cpp +++ b/llvm/tools/llvm-nm/llvm-nm.cpp @@ -561,37 +561,22 @@ struct DarwinStabName { const char *Name; }; const struct DarwinStabName DarwinStabNames[] = { - {MachO::N_GSYM, "GSYM"}, - {MachO::N_FNAME, "FNAME"}, - {MachO::N_FUN, "FUN"}, - {MachO::N_STSYM, "STSYM"}, - {MachO::N_LCSYM, "LCSYM"}, - {MachO::N_BNSYM, "BNSYM"}, - {MachO::N_PC, "PC"}, - {MachO::N_AST, "AST"}, - {MachO::N_OPT, "OPT"}, - {MachO::N_RSYM, "RSYM"}, - {MachO::N_SLINE, "SLINE"}, - {MachO::N_ENSYM, "ENSYM"}, - {MachO::N_SSYM, "SSYM"}, - {MachO::N_SO, "SO"}, - {MachO::N_OSO, "OSO"}, - {MachO::N_LSYM, "LSYM"}, - {MachO::N_BINCL, "BINCL"}, - {MachO::N_SOL, "SOL"}, - {MachO::N_PARAMS, "PARAM"}, - {MachO::N_VERSION, "VERS"}, - {MachO::N_OLEVEL, "OLEV"}, - {MachO::N_PSYM, "PSYM"}, - {MachO::N_EINCL, "EINCL"}, - {MachO::N_ENTRY, "ENTRY"}, - {MachO::N_LBRAC, "LBRAC"}, - {MachO::N_EXCL, "EXCL"}, - {MachO::N_RBRAC, "RBRAC"}, - {MachO::N_BCOMM, "BCOMM"}, - {MachO::N_ECOMM, "ECOMM"}, - {MachO::N_ECOML, "ECOML"}, - {MachO::N_LENG, "LENG"}, + {MachO::N_GSYM, "GSYM"}, {MachO::N_FNAME, "FNAME"}, + {MachO::N_FUN, "FUN"}, {MachO::N_STSYM, "STSYM"}, + {MachO::N_LCSYM, "LCSYM"}, {MachO::N_BNSYM, "BNSYM"}, + {MachO::N_PC, "PC"}, {MachO::N_AST, "AST"}, + {MachO::N_OPT, "OPT"}, {MachO::N_RSYM, "RSYM"}, + {MachO::N_SLINE, "SLINE"}, {MachO::N_ENSYM, "ENSYM"}, + {MachO::N_SSYM, "SSYM"}, {MachO::N_SO, "SO"}, + {MachO::N_OSO, "OSO"}, {MachO::N_LIB, "LIB"}, + {MachO::N_LSYM, "LSYM"}, {MachO::N_BINCL, "BINCL"}, + {MachO::N_SOL, "SOL"}, {MachO::N_PARAMS, "PARAM"}, + {MachO::N_VERSION, "VERS"}, {MachO::N_OLEVEL, "OLEV"}, + {MachO::N_PSYM, "PSYM"}, {MachO::N_EINCL, "EINCL"}, + {MachO::N_ENTRY, "ENTRY"}, {MachO::N_LBRAC, "LBRAC"}, + {MachO::N_EXCL, "EXCL"}, {MachO::N_RBRAC, "RBRAC"}, + {MachO::N_BCOMM, "BCOMM"}, {MachO::N_ECOMM, "ECOMM"}, + {MachO::N_ECOML, "ECOML"}, {MachO::N_LENG, "LENG"}, }; static const char *getDarwinStabString(uint8_t NType) { From 0cbaff815cf2083b956af037c2efbdce722ed560 Mon Sep 17 00:00:00 2001 From: Aart Bik <39774503+aartbik@users.noreply.github.com> Date: Thu, 26 Oct 2023 10:48:29 -0700 Subject: [PATCH 105/877] [mlir][sparse] cleanup conversion test (#70356) Various TODOs had been added that actually removed the actual test. This puts the CHECK test backs and removes the TODOs that have no immediate plans. --- .../SparseTensor/convert_sparse2sparse.mlir | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir index 658e8aa40022e..0280e27b4e312 100644 --- a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir @@ -41,25 +41,24 @@ func.func @sparse_nop_convert(%arg0: tensor<64xf32, #SparseVector>) -> tensor<64 } // CHECK-LABEL: func.func @sparse_hidden_nop_cast -// TODO: The following convert should be a cast instead. -// CHECK: sparse_tensor.convert -// CHECK: return +// CHECK-NEXT: sparse_tensor.convert +// CHECK-NEXT: return func.func @sparse_hidden_nop_cast(%arg0: tensor<32xf32, #SparseVector>) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor<32xf32, #SparseVector> to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_convert_1d_ss( -// TODO: libgen path need to support efficient format conversion (e.g., 32 bit pos -> 64 bit pos). -// Maybe we should use a different operator as well to be clear. +// CHECK-NEXT: sparse_tensor.convert +// CHECK-NEXT: return func.func @sparse_convert_1d_ss(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_convert( -// TODO: libgen path need to support efficient format conversion (e.g., 32 bit pos -> 64 bit pos). -// Maybe we should use a different operator as well to be clear. +// CHECK-NEXT: sparse_tensor.convert +// CHECK-NEXT: return func.func @sparse_convert(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor @@ -73,6 +72,7 @@ func.func @sparse_convert(%arg0: tensor) -> tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor @@ -83,6 +83,7 @@ func.func @sparse_convert_permuted(%arg0: tensor) -> te // CHECK: sparse_tensor.insert // CHECK: sparse_tensor.load // CHECK-NOT: sparse_tensor.reorder_coo +// CHECK: return func.func @sparse_convert_slice(%arg0: tensor<2x13xi32, #COOSlice>) -> (tensor<2x13xi32, #SortedCOO2D>) { %0 = sparse_tensor.convert %arg0 : tensor<2x13xi32, #COOSlice> to tensor<2x13xi32, #SortedCOO2D> return %0 : tensor<2x13xi32, #SortedCOO2D> From 02fcae844c30e086600018e20c2e512269da1126 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 26 Oct 2023 17:54:25 +0000 Subject: [PATCH 106/877] [gn build] Port 88d00a6897d7 --- llvm/utils/gn/secondary/llvm/tools/dsymutil/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/tools/dsymutil/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/dsymutil/BUILD.gn index db8f5d1da3097..e962d100f217a 100644 --- a/llvm/utils/gn/secondary/llvm/tools/dsymutil/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/dsymutil/BUILD.gn @@ -28,6 +28,7 @@ driver_executable("dsymutil") { "DwarfLinkerForBinary.cpp", "MachODebugMapParser.cpp", "MachOUtils.cpp", + "RelocationMap.cpp", "Reproducer.cpp", "SymbolMap.cpp", "dsymutil.cpp", From d307dc5b512753efa4db45576fa9aeed1de97e62 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 26 Oct 2023 10:38:59 -0700 Subject: [PATCH 107/877] [RISCV][GISel] Allow G_AND/G_OR/G_XOR to have s32 types on RV64. Even though we don't have W instructions for them. This treats them the same as other binary operators. --- .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 7 +--- llvm/lib/Target/RISCV/RISCVGISel.td | 10 +++++ .../legalizer/rv64/legalize-abs.mir | 22 ++++------ .../legalizer/rv64/legalize-add.mir | 10 +++-- .../legalizer/rv64/legalize-addo-subo.mir | 14 +++++-- .../legalizer/rv64/legalize-and.mir | 28 +++++++++---- .../legalizer/rv64/legalize-ashr.mir | 24 +++++------ .../legalizer/rv64/legalize-div.mir | 42 +++++++++---------- .../legalizer/rv64/legalize-load.mir | 31 ++++++-------- .../legalizer/rv64/legalize-lshr.mir | 42 +++++++++---------- .../GlobalISel/legalizer/rv64/legalize-or.mir | 28 +++++++++---- .../legalizer/rv64/legalize-rem.mir | 42 +++++++++---------- .../legalizer/rv64/legalize-shl.mir | 24 +++++------ .../legalizer/rv64/legalize-store.mir | 22 ++++------ .../legalizer/rv64/legalize-sub.mir | 10 +++-- .../legalizer/rv64/legalize-xor.mir | 28 +++++++++---- 16 files changed, 212 insertions(+), 172 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 736422c4af131..1d4e279d524be 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -33,12 +33,7 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) { using namespace TargetOpcode; - getActionDefinitionsBuilder({G_AND, G_OR, G_XOR}) - .legalFor({XLenLLT}) - .widenScalarToNextPow2(0) - .clampScalar(0, XLenLLT, XLenLLT); - - getActionDefinitionsBuilder({G_ADD, G_SUB}) + getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR}) .legalFor({s32, XLenLLT}) .widenScalarToNextPow2(0) .clampScalar(0, s32, XLenLLT); diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index 60896106bc0b5..fcac2f3655962 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -90,12 +90,22 @@ def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)), let Predicates = [IsRV64] in { def : Pat<(i32 (add GPR:$rs1, GPR:$rs2)), (ADDW GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (sub GPR:$rs1, GPR:$rs2)), (SUBW GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (and GPR:$rs1, GPR:$rs2)), (AND GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (or GPR:$rs1, GPR:$rs2)), (OR GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (xor GPR:$rs1, GPR:$rs2)), (XOR GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (add GPR:$rs1, simm12i32:$imm)), (ADDIW GPR:$rs1, (i64 (as_i64imm $imm)))>; def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)), (ADDIW GPR:$rs1, (i64 (NegImm $imm)))>; +def : Pat<(i32 (and GPR:$rs1, simm12i32:$imm)), + (ANDI GPR:$rs1, (i64 (as_i64imm $imm)))>; +def : Pat<(i32 (or GPR:$rs1, simm12i32:$imm)), + (ORI GPR:$rs1, (i64 (as_i64imm $imm)))>; +def : Pat<(i32 (xor GPR:$rs1, simm12i32:$imm)), + (XORI GPR:$rs1, (i64 (as_i64imm $imm)))>; + def : Pat<(i32 (shl GPR:$rs1, (i32 GPR:$rs2))), (SLLW GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (sra GPR:$rs1, (i32 GPR:$rs2))), (SRAW GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (srl GPR:$rs1, (i32 GPR:$rs2))), (SRLW GPR:$rs1, GPR:$rs2)>; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-abs.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-abs.mir index ae86ede4724ab..d7b694b9dfdb8 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-abs.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-abs.mir @@ -15,11 +15,10 @@ body: | ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[ASSERT_ZEXT]](s64) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[TRUNC1]], [[ASHR1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ADD]](s32) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ASHR1]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[XOR]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] ; CHECK-NEXT: $x10 = COPY [[AND]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 @@ -45,11 +44,10 @@ body: | ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[ASSERT_SEXT]](s64) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[TRUNC1]], [[ASHR1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ADD]](s32) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ASHR1]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[XOR]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[C2]](s64) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C2]](s64) ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[C2]](s64) ; CHECK-NEXT: $x10 = COPY [[ASHR2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 @@ -72,11 +70,9 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[TRUNC]], [[C]](s32) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[TRUNC]], [[ASHR]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ADD]](s32) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ASHR]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[XOR]], 32 - ; CHECK-NEXT: $x10 = COPY [[SEXT_INREG]](s64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[XOR]](s32) + ; CHECK-NEXT: $x10 = COPY [[SEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %1:_(s64) = COPY $x10 %2:_(s64) = G_ASSERT_SEXT %1, 32 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-add.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-add.mir index 4a406fbe76cd3..11ac3c5991376 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-add.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-add.mir @@ -195,11 +195,15 @@ body: | ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[AND]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ADD2]](s64), [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ICMP1]], [[AND1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP2]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP]](s64) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP1]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC2]], [[AND1]] ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s64) = G_ADD %hi1, %hi2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[OR]], [[C2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s64) = G_ADD [[ADD3]], [[AND2]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s64) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-addo-subo.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-addo-subo.mir index 5659f89f55ede..6600f7f30d729 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-addo-subo.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-addo-subo.mir @@ -122,9 +122,12 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(slt), [[ADD]](s64), [[COPY]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP1]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP]](s64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[XOR]](s32) ; CHECK-NEXT: $x10 = COPY [[ADD]](s64) - ; CHECK-NEXT: $x11 = COPY [[XOR]](s64) + ; CHECK-NEXT: $x11 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -255,9 +258,12 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(slt), [[SUB]](s64), [[COPY]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP1]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP]](s64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[XOR]](s32) ; CHECK-NEXT: $x10 = COPY [[SUB]](s64) - ; CHECK-NEXT: $x11 = COPY [[XOR]](s64) + ; CHECK-NEXT: $x11 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-and.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-and.mir index f7802eb3b6b2f..89541575cf1c8 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-and.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-and.mir @@ -8,8 +8,11 @@ body: | ; CHECK-LABEL: name: and_i8 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: $x10 = COPY [[AND]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -28,8 +31,11 @@ body: | ; CHECK-LABEL: name: and_i15 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: $x10 = COPY [[AND]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -48,8 +54,11 @@ body: | ; CHECK-LABEL: name: and_i16 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: $x10 = COPY [[AND]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -68,8 +77,11 @@ body: | ; CHECK-LABEL: name: and_i32 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] - ; CHECK-NEXT: $x10 = COPY [[AND]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-ashr.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-ashr.mir index 02c6b630acbda..a422c42e77684 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-ashr.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-ashr.mir @@ -8,14 +8,14 @@ body: | ; CHECK-LABEL: name: ashr_i8 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[C1]](s32) ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32) - ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[TRUNC]](s32) + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[AND]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ASHR1]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 @@ -36,14 +36,14 @@ body: | ; CHECK-LABEL: name: ashr_i15 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32767 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[C1]](s32) ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32) - ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[TRUNC]](s32) + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[AND]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ASHR1]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 @@ -64,14 +64,14 @@ body: | ; CHECK-LABEL: name: ashr_i16 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[C1]](s32) ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32) - ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[TRUNC]](s32) + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[AND]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ASHR1]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-div.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-div.mir index 5909a062844a6..d3895cb54092e 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-div.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-div.mir @@ -332,13 +332,13 @@ body: | ; CHECK-M-LABEL: name: udiv_i8 ; CHECK-M: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-M-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] - ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; CHECK-M-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[TRUNC]], [[TRUNC1]] + ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] + ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-M-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] ; CHECK-M-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UDIV]](s32) ; CHECK-M-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-M-NEXT: PseudoRET implicit $x10 @@ -373,13 +373,13 @@ body: | ; CHECK-M-LABEL: name: udiv_i15 ; CHECK-M: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-M-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32767 - ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32767 - ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] - ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; CHECK-M-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[TRUNC]], [[TRUNC1]] + ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] + ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-M-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] ; CHECK-M-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UDIV]](s32) ; CHECK-M-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-M-NEXT: PseudoRET implicit $x10 @@ -414,13 +414,13 @@ body: | ; CHECK-M-LABEL: name: udiv_i16 ; CHECK-M: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-M-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] - ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; CHECK-M-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[TRUNC]], [[TRUNC1]] + ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] + ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-M-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] ; CHECK-M-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UDIV]](s32) ; CHECK-M-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-M-NEXT: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-load.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-load.mir index 43cb3445ac4cc..895e10a8b5fae 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-load.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-load.mir @@ -220,10 +220,9 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SHL]](s32) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ZEXTLOAD]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: $x10 = COPY [[OR]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s16) = G_LOAD %0(p0) :: (load (s16), align 1) @@ -259,9 +258,7 @@ body: | ; CHECK-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SHL]](s32) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ZEXTLOAD]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) @@ -270,15 +267,12 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C4]](s32) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[SHL1]](s32) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ZEXTLOAD2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[OR1]](s64) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C5]](s32) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[SHL2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[ANYEXT4]], [[OR]] - ; CHECK-NEXT: $x10 = COPY [[OR2]](s64) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C5]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR2]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s32) = G_LOAD %0(p0) :: (load (s32), align 1) @@ -314,10 +308,9 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SHL]](s32) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ZEXTLOAD]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: $x10 = COPY [[OR]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s32) = G_LOAD %0(p0) :: (load (s32), align 2) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-lshr.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-lshr.mir index 78cd648706de9..aa34e7d2cdcae 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-lshr.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-lshr.mir @@ -8,13 +8,13 @@ body: | ; CHECK-LABEL: name: lshr_i8 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC1]], [[TRUNC]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 @@ -35,13 +35,13 @@ body: | ; CHECK-LABEL: name: lshr_i15 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32767 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32767 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC1]], [[TRUNC]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 @@ -62,13 +62,13 @@ body: | ; CHECK-LABEL: name: lshr_i16 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC1]], [[TRUNC]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-or.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-or.mir index d5b26a42b203e..3c56929ef67bd 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-or.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-or.mir @@ -8,8 +8,11 @@ body: | ; CHECK-LABEL: name: or_i8 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $x10 = COPY [[OR]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -28,8 +31,11 @@ body: | ; CHECK-LABEL: name: or_i15 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $x10 = COPY [[OR]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -48,8 +54,11 @@ body: | ; CHECK-LABEL: name: or_i16 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $x10 = COPY [[OR]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -68,8 +77,11 @@ body: | ; CHECK-LABEL: name: or_i32 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $x10 = COPY [[OR]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-rem.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-rem.mir index ec165ce551330..e43516ee4acbb 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-rem.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-rem.mir @@ -332,13 +332,13 @@ body: | ; CHECK-M-LABEL: name: urem_i8 ; CHECK-M: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-M-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] - ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; CHECK-M-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[TRUNC]], [[TRUNC1]] + ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] + ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-M-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] ; CHECK-M-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UREM]](s32) ; CHECK-M-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-M-NEXT: PseudoRET implicit $x10 @@ -373,13 +373,13 @@ body: | ; CHECK-M-LABEL: name: urem_i15 ; CHECK-M: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-M-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32767 - ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32767 - ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] - ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; CHECK-M-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[TRUNC]], [[TRUNC1]] + ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] + ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-M-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] ; CHECK-M-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UREM]](s32) ; CHECK-M-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-M-NEXT: PseudoRET implicit $x10 @@ -414,13 +414,13 @@ body: | ; CHECK-M-LABEL: name: urem_i16 ; CHECK-M: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-M-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] - ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; CHECK-M-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[TRUNC]], [[TRUNC1]] + ; CHECK-M-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-M-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-M-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] + ; CHECK-M-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-M-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-M-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-M-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] ; CHECK-M-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UREM]](s32) ; CHECK-M-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-M-NEXT: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-shl.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-shl.mir index 3d81a140e3be2..0f9587b4c8b8d 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-shl.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-shl.mir @@ -8,11 +8,11 @@ body: | ; CHECK-LABEL: name: shl_i8 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[TRUNC]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[AND]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SHL]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 @@ -33,11 +33,11 @@ body: | ; CHECK-LABEL: name: shl_i15 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32767 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[TRUNC]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[AND]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SHL]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 @@ -58,11 +58,11 @@ body: | ; CHECK-LABEL: name: shl_i16 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[TRUNC]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[AND]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SHL]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-store.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-store.mir index 0cba84fad1e2a..fb4ead8f8d711 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-store.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-store.mir @@ -209,10 +209,9 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s8)) @@ -254,20 +253,17 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C3]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC1]], [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64) ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s8)) ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p0) :: (store (s8) into unknown-address + 1) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C6]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC2]], [[C5]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]] + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C5]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C7]](s64) ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 2) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-sub.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-sub.mir index 81c4edf2add7c..20bf8929c5552 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-sub.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-sub.mir @@ -195,11 +195,15 @@ body: | ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[AND]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ICMP1]], [[AND1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP2]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP]](s64) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP1]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC2]], [[AND1]] ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s64) = G_SUB %hi1, %hi2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[OR]], [[C2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB [[SUB3]], [[AND2]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s64) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-xor.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-xor.mir index 80081bd2498a1..469f8b25f7ec1 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-xor.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-xor.mir @@ -8,8 +8,11 @@ body: | ; CHECK-LABEL: name: xor_i8 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $x10 = COPY [[XOR]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[XOR]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -28,8 +31,11 @@ body: | ; CHECK-LABEL: name: xor_i15 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $x10 = COPY [[XOR]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[XOR]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -48,8 +54,11 @@ body: | ; CHECK-LABEL: name: xor_i16 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $x10 = COPY [[XOR]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[XOR]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 @@ -68,8 +77,11 @@ body: | ; CHECK-LABEL: name: xor_i32 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $x10 = COPY [[XOR]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[XOR]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %1:_(s64) = COPY $x11 From 645b7795d49774ea055fc4e803bf99f742d2739a Mon Sep 17 00:00:00 2001 From: Youngsuk Kim Date: Thu, 26 Oct 2023 12:55:26 -0500 Subject: [PATCH 108/877] [mlir] Remove no-op ptr-to-ptr bitcasts (NFC) Opaque pointer cleanup effort. NFC. --- mlir/lib/ExecutionEngine/ExecutionEngine.cpp | 1 - .../LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.cpp | 8 ++------ .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 8 ++------ 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp index 9acbc9adf8140..dbcc0ba6fc99c 100644 --- a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp @@ -194,7 +194,6 @@ static void packFunctionArguments(Module *module) { builder.CreateGEP(builder.getInt8PtrTy(), argList, retIndex); llvm::Value *retPtr = builder.CreateLoad(builder.getInt8PtrTy(), retPtrPtr); - retPtr = builder.CreateBitCast(retPtr, result->getType()->getPointerTo()); builder.CreateStore(result, retPtr); } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.cpp index 37fec190d6f40..9ce20d798ab6b 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.cpp @@ -115,17 +115,13 @@ processOperands(llvm::IRBuilderBase &builder, llvm::Value *ptrBaseGEP = builder.CreateInBoundsGEP( arrI8PtrTy, mapperAllocas.ArgsBase, {builder.getInt32(0), builder.getInt32(index)}); - llvm::Value *ptrBaseCast = builder.CreateBitCast( - ptrBaseGEP, dataPtrBase->getType()->getPointerTo()); - builder.CreateStore(dataPtrBase, ptrBaseCast); + builder.CreateStore(dataPtrBase, ptrBaseGEP); // Store pointer extracted from operand into the i-th position of args. llvm::Value *ptrGEP = builder.CreateInBoundsGEP( arrI8PtrTy, mapperAllocas.Args, {builder.getInt32(0), builder.getInt32(index)}); - llvm::Value *ptrCast = - builder.CreateBitCast(ptrGEP, dataPtr->getType()->getPointerTo()); - builder.CreateStore(dataPtr, ptrCast); + builder.CreateStore(dataPtr, ptrGEP); // Store size extracted from operand into the i-th position of argSizes. llvm::Value *sizeGEP = builder.CreateInBoundsGEP( diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index eb8f6cf277b11..875ce11391587 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1523,8 +1523,6 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::GlobalOp global = addressOfOp.getGlobal(moduleTranslation.symbolTable()); llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global); - llvm::Value *data = - builder.CreateBitCast(globalValue, builder.getInt8PtrTy()); llvm::Type *type = globalValue->getValueType(); llvm::TypeSize typeSize = builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize( @@ -1532,12 +1530,10 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue()); llvm::StringRef suffix = llvm::StringRef(".cache", 6); std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str(); - // Emit runtime function and bitcast its type (i8*) to real data type. llvm::Value *callInst = moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate( - ompLoc, data, size, cacheName); - llvm::Value *result = builder.CreateBitCast(callInst, globalValue->getType()); - moduleTranslation.mapValue(opInst.getResult(0), result); + ompLoc, globalValue, size, cacheName); + moduleTranslation.mapValue(opInst.getResult(0), callInst); return success(); } From fceb7193ea538827ce7cfdf43bb972d88df53878 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 26 Oct 2023 20:09:38 +0200 Subject: [PATCH 109/877] clarify NaN propagation in fptrunc (#68554) Follow-up to #66579: while implementing those semantics in Miri I realized there's a special case to be considered in truncating float casts. --- llvm/docs/LangRef.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index cabb5cd1bed62..c97a7ae372bc6 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -11332,7 +11332,10 @@ environment `. NaN values follow the usual :ref:`NaN behaviors `, except that _if_ a NaN payload is propagated from the input ("Quieting NaN propagation" or "Unchanged NaN propagation" cases), then the low order bits of the NaN payload -which cannot fit in the resulting type are discarded. +which cannot fit in the resulting type are discarded. Note that if discarding +the low order bits leads to an all-0 payload, this cannot be represented as a +signaling NaN (it would represent an infinity instead), so in that case +"Unchanged NaN propagation" is not possible. Example: """""""" From ff94061a9f15c121fa1068caed680c6916a215e0 Mon Sep 17 00:00:00 2001 From: Aart Bik <39774503+aartbik@users.noreply.github.com> Date: Thu, 26 Oct 2023 11:14:44 -0700 Subject: [PATCH 110/877] [mlir][sparse] remove reshape dot test (#70359) This no longer tests a required feature. --- .../SparseTensor/sparse_reshape_dot.mlir | 89 ------------------- 1 file changed, 89 deletions(-) delete mode 100644 mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir deleted file mode 100644 index c562d6845e84f..0000000000000 --- a/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir +++ /dev/null @@ -1,89 +0,0 @@ -// -// TODO: this test case is temporarily disabled as we are improving zero-cost sparse tensor reshaping. -// XFAIL: * -// -// RUN: mlir-opt %s --linalg-generalize-named-ops --sparsification --cse --canonicalize | FileCheck %s - -#COO_2D = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton), posWidth = 32, crdWidth = 32 }> -#COO_3D = #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : compressed(nonunique), d1 : singleton(nonunique), d2 : singleton), posWidth = 32, crdWidth = 32 }> - - -// CHECK-LABEL: func.func @sparse_reshape_fused( -// CHECK-SAME: %[[VAL_0:.*]]: tensor<5x6xf32>, -// CHECK-SAME: %[[VAL_1:.*]]: tensor<6x2x3xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor { -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant false -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 5 : index -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 3 : index -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_7:.*]] = tensor.empty() : tensor<5x6xf32> -// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} -// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} -// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} -// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 2 : index} -// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_1]] -// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_7]] : memref<5x6xf32> -// CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { -// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref -// CHECK: %[[VAL_16:.*]] = arith.extui %[[VAL_15]] : i32 to i64 -// CHECK: %[[VAL_17:.*]] = arith.index_cast %[[VAL_16]] : i64 to index -// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref -// CHECK: %[[VAL_19:.*]] = arith.extui %[[VAL_18]] : i32 to i64 -// CHECK: %[[VAL_20:.*]] = arith.index_cast %[[VAL_19]] : i64 to index -// CHECK: %[[VAL_21:.*]] = scf.while (%[[VAL_22:.*]] = %[[VAL_17]]) : (index) -> index { -// CHECK: %[[VAL_23:.*]] = arith.cmpi ult, %[[VAL_22]], %[[VAL_20]] : index -// CHECK: scf.condition(%[[VAL_23]]) %[[VAL_22]] : index -// CHECK: } do { -// CHECK: ^bb0(%[[VAL_24:.*]]: index): -// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_24]]] : memref> -// CHECK: %[[VAL_26:.*]] = arith.extui %[[VAL_25]] : i32 to i64 -// CHECK: %[[VAL_27:.*]] = arith.index_cast %[[VAL_26]] : i64 to index -// CHECK: %[[VAL_28:.*]] = scf.while (%[[VAL_29:.*]] = %[[VAL_24]]) : (index) -> index { -// CHECK: %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_29]], %[[VAL_20]] : index -// CHECK: %[[VAL_31:.*]] = scf.if %[[VAL_30]] -> (i1) { -// CHECK: %[[VAL_32:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_29]]] : memref> -// CHECK: %[[VAL_33:.*]] = arith.extui %[[VAL_32]] : i32 to i64 -// CHECK: %[[VAL_34:.*]] = arith.index_cast %[[VAL_33]] : i64 to index -// CHECK: %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_27]] : index -// CHECK: scf.yield %[[VAL_35]] : i1 -// CHECK: } else { -// CHECK: scf.yield %[[VAL_2]] : i1 -// CHECK: } -// CHECK: scf.condition(%[[VAL_36:.*]]) %[[VAL_29]] : index -// CHECK: } do { -// CHECK: ^bb0(%[[VAL_37:.*]]: index): -// CHECK: %[[VAL_38:.*]] = arith.addi %[[VAL_37]], %[[VAL_6]] : index -// CHECK: scf.yield %[[VAL_38]] : index -// CHECK: } -// CHECK: %[[VAL_39:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_14]], %[[VAL_27]]] : tensor<5x6xf32> -// CHECK: scf.for %[[VAL_40:.*]] = %[[VAL_24]] to %[[VAL_41:.*]] step %[[VAL_6]] { -// CHECK: %[[VAL_42:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_40]]] : memref> -// CHECK: %[[VAL_43:.*]] = arith.extui %[[VAL_42]] : i32 to i64 -// CHECK: %[[VAL_44:.*]] = arith.index_cast %[[VAL_43]] : i64 to index -// CHECK: %[[VAL_45:.*]] = arith.muli %[[VAL_44]], %[[VAL_4]] : index -// CHECK: %[[VAL_46:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_40]]] : memref> -// CHECK: %[[VAL_47:.*]] = arith.extui %[[VAL_46]] : i32 to i64 -// CHECK: %[[VAL_48:.*]] = arith.index_cast %[[VAL_47]] : i64 to index -// CHECK: %[[VAL_49:.*]] = arith.addi %[[VAL_45]], %[[VAL_48]] : index -// CHECK: %[[VAL_50:.*]] = tensor.extract %[[VAL_7]]{{\[}}%[[VAL_14]], %[[VAL_49]]] : tensor<5x6xf32> -// CHECK: %[[VAL_51:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_40]]] : memref -// CHECK: %[[VAL_52:.*]] = arith.mulf %[[VAL_39]], %[[VAL_51]] : f32 -// CHECK: %[[VAL_53:.*]] = arith.addf %[[VAL_50]], %[[VAL_52]] : f32 -// CHECK: memref.store %[[VAL_53]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_49]]] : memref<5x6xf32> -// CHECK: } {"Emitted from" = "linalg.generic"} -// CHECK: scf.yield %[[VAL_54:.*]] : index -// CHECK: } attributes {"Emitted from" = "linalg.generic"} -// CHECK: } {"Emitted from" = "linalg.generic"} -// CHECK: %[[VAL_55:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<5x6xf32> -// CHECK: %[[VAL_56:.*]] = tensor.expand_shape %[[VAL_55]] {{\[\[}}0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32> -// CHECK: %[[VAL_57:.*]] = tensor.cast %[[VAL_56]] : tensor<5x2x3xf32> to tensor -// CHECK: return %[[VAL_57]] : tensor -// CHECK: } -func.func @sparse_reshape_fused(%arg0: tensor<5x6xf32>, %arg1: tensor<6x2x3xf32, #COO_3D>) -> tensor { - %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32, #COO_3D> into tensor<6x6xf32, #COO_2D> - %0 = tensor.empty() : tensor<5x6xf32> - %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32, #COO_2D>) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> - %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32> - %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor - return %ret1 : tensor -} From cf07904ee4ef5445342404f28193cb8583c0a61c Mon Sep 17 00:00:00 2001 From: Jinsong Ji Date: Thu, 26 Oct 2023 14:26:47 -0400 Subject: [PATCH 111/877] [Github] Fix libc docs build (#70363) https://github.com/llvm/llvm-project/pull/69824 added libc build, but missed the folder in ninja command, is causing failures. ninja: fatal: chdir to 'docs-libc-html' - No such file or directory ninja: Entering directory `docs-libc-html' --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index cbb3706cc1bcf..e6af2f41167e0 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -119,7 +119,7 @@ jobs: if: steps.docs-changed-subprojects.outputs.libc_any_changed == 'true' run: | cmake -B libc-build -GNinja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_RUNTIMES="libc" -DLLVM_ENABLE_SPHINX=ON ./runtimes - TZ=UTC ninja -C docs-libc-html + TZ=UTC ninja -C libc-build docs-libc-html - name: Build LLD docs if: steps.docs-changed-subprojects.outputs.lld_any_changed == 'true' run: | From f9ead46931aef2978ddf350ba6523638175d7861 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Thu, 26 Oct 2023 19:28:28 +0100 Subject: [PATCH 112/877] [AST] Only dump desugared type when visibly different (#65214) These are an artifact of how types are structured but serve little purpose, merely showing that the type is sugared in some way. For example, ElaboratedType's existence means struct S gets printed as 'struct S':'struct S' in the AST, which is unnecessary visual clutter. Note that skipping the second print when the types have the same string matches what we do for diagnostics, where the aka will be skipped. --- clang/docs/HowToSetupToolingForLLVM.rst | 2 +- clang/docs/ReleaseNotes.rst | 22 +++++++ clang/lib/AST/JSONNodeDumper.cpp | 10 +++- clang/lib/AST/TextNodeDumper.cpp | 13 ++-- .../AST/HLSL/this-reference-template.hlsl | 8 +-- .../test/AST/ast-dump-APValue-anon-union.cpp | 10 ++-- clang/test/AST/ast-dump-APValue-struct.cpp | 12 ++-- clang/test/AST/ast-dump-APValue-union.cpp | 10 ++-- clang/test/AST/ast-dump-attr.cpp | 12 ++-- clang/test/AST/ast-dump-decl-json.c | 4 -- clang/test/AST/ast-dump-decl-json.m | 1 - clang/test/AST/ast-dump-decl.cpp | 26 ++++---- clang/test/AST/ast-dump-decl.m | 2 +- clang/test/AST/ast-dump-expr-json.c | 6 -- clang/test/AST/ast-dump-expr-json.cpp | 40 ------------- clang/test/AST/ast-dump-expr-json.m | 24 -------- clang/test/AST/ast-dump-expr.c | 6 +- clang/test/AST/ast-dump-expr.cpp | 22 +++---- clang/test/AST/ast-dump-fpfeatures.cpp | 2 +- clang/test/AST/ast-dump-funcs.cpp | 2 +- clang/test/AST/ast-dump-functionprototype.cpp | 6 +- clang/test/AST/ast-dump-lambda.cpp | 2 +- clang/test/AST/ast-dump-objc-arc-json.m | 1 - ...openmp-begin-declare-variant_reference.cpp | 4 +- ...penmp-begin-declare-variant_template_1.cpp | 8 +-- ...penmp-begin-declare-variant_template_2.cpp | 14 ++--- ...penmp-begin-declare-variant_template_3.cpp | 44 +++++++------- .../AST/ast-dump-overloaded-operators.cpp | 16 ++--- clang/test/AST/ast-dump-records-json.cpp | 7 --- clang/test/AST/ast-dump-records.cpp | 8 +-- clang/test/AST/ast-dump-recovery.cpp | 12 ++-- clang/test/AST/ast-dump-stmt-json.cpp | 60 ------------------- clang/test/AST/ast-dump-stmt.cpp | 52 ++++++++-------- clang/test/AST/ast-dump-stmt.m | 2 +- .../test/AST/ast-dump-template-decls-json.cpp | 1 - clang/test/AST/ast-dump-template-decls.cpp | 2 +- ...dump-template-json-win32-mangler-crash.cpp | 2 - clang/test/AST/ast-dump-temporaries-json.cpp | 5 -- clang/test/AST/ast-dump-types-json.cpp | 1 - clang/test/AST/coroutine-locals-cleanup.cpp | 4 +- clang/test/AST/float16.cpp | 32 +++++----- clang/test/AST/nrvo.c | 12 ++-- clang/test/AST/sourceranges.cpp | 4 +- clang/test/C/drs/dr253.c | 2 +- .../dcl.decl/dcl.init/dcl.init.ref/p4-ast.cpp | 6 +- clang/test/Import/objc-param-decl/test.m | 2 +- clang/test/OpenMP/align_clause_ast_print.cpp | 2 +- clang/test/OpenMP/generic_loop_ast_print.cpp | 6 +- clang/test/OpenMP/scope_ast_print.cpp | 4 +- clang/test/SemaCXX/co_await-ast.cpp | 14 ++--- clang/test/SemaCXX/consteval-cleanup.cpp | 14 ++--- .../address-space-deduction.clcpp | 2 +- clang/test/SemaOpenCLCXX/addrspace-auto.clcpp | 8 +-- .../aggregate-deduction-candidate.cpp | 34 +++++------ clang/test/SemaTemplate/deduction-guide.cpp | 12 ++-- .../SemaTemplate/default-expr-arguments-3.cpp | 6 +- clang/test/SemaTemplate/make_integer_seq.cpp | 8 +-- clang/test/SemaTemplate/pr47676.cpp | 2 +- clang/test/SemaTemplate/type_pack_element.cpp | 2 +- clang/unittests/AST/ASTImporterTest.cpp | 2 +- 60 files changed, 269 insertions(+), 390 deletions(-) diff --git a/clang/docs/HowToSetupToolingForLLVM.rst b/clang/docs/HowToSetupToolingForLLVM.rst index 62189511aeb2a..dc1c17b0ae68d 100644 --- a/clang/docs/HowToSetupToolingForLLVM.rst +++ b/clang/docs/HowToSetupToolingForLLVM.rst @@ -172,7 +172,7 @@ Examples: clang::ASTConsumer *newASTConsumer() (CompoundStmt 0x44da290 (IfStmt 0x44d97c8 <<>> - (ImplicitCastExpr 0x44d96d0 '_Bool':'_Bool' + (ImplicitCastExpr 0x44d96d0 '_Bool' ... $ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-print -ast-dump-filter ActionFactory::newASTConsumer Processing: tools/clang/tools/clang-check/ClangCheck.cpp. diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7238386231e1a..82550232947f7 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -113,6 +113,28 @@ ABI Changes in This Version - Following the SystemV ABI for x86-64, ``__int128`` arguments will no longer be split between a register and a stack slot. +AST Dumping Potentially Breaking Changes +---------------------------------------- +- When dumping a sugared type, Clang will no longer print the desugared type if + its textual representation is the same as the sugared one. This applies to + both text dumps of the form ``'foo':'foo'`` which will now be dumped as just + ``'foo'``, and JSON dumps of the form: + + .. code-block:: json + + "type": { + "qualType": "foo", + "desugaredQualType": "foo" + } + + which will now be dumped as just: + + .. code-block:: json + + "type": { + "qualType": "foo" + } + What's New in Clang |release|? ============================== Some of the major new features and improvements to Clang are listed diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp index beb07015f0bcb..25b94ec5616b1 100644 --- a/clang/lib/AST/JSONNodeDumper.cpp +++ b/clang/lib/AST/JSONNodeDumper.cpp @@ -315,12 +315,16 @@ std::string JSONNodeDumper::createPointerRepresentation(const void *Ptr) { llvm::json::Object JSONNodeDumper::createQualType(QualType QT, bool Desugar) { SplitQualType SQT = QT.split(); - llvm::json::Object Ret{{"qualType", QualType::getAsString(SQT, PrintPolicy)}}; + std::string SQTS = QualType::getAsString(SQT, PrintPolicy); + llvm::json::Object Ret{{"qualType", SQTS}}; if (Desugar && !QT.isNull()) { SplitQualType DSQT = QT.getSplitDesugaredType(); - if (DSQT != SQT) - Ret["desugaredQualType"] = QualType::getAsString(DSQT, PrintPolicy); + if (DSQT != SQT) { + std::string DSQTS = QualType::getAsString(DSQT, PrintPolicy); + if (DSQTS != SQTS) + Ret["desugaredQualType"] = DSQTS; + } if (const auto *TT = QT->getAs()) Ret["typeAliasDeclId"] = createPointerRepresentation(TT->getDecl()); } diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 15eb6c6191edf..ca609cf2d2060 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -692,13 +692,18 @@ void TextNodeDumper::dumpBareType(QualType T, bool Desugar) { ColorScope Color(OS, ShowColors, TypeColor); SplitQualType T_split = T.split(); - OS << "'" << QualType::getAsString(T_split, PrintPolicy) << "'"; + std::string T_str = QualType::getAsString(T_split, PrintPolicy); + OS << "'" << T_str << "'"; if (Desugar && !T.isNull()) { - // If the type is sugared, also dump a (shallow) desugared type. + // If the type is sugared, also dump a (shallow) desugared type when + // it is visibly different. SplitQualType D_split = T.getSplitDesugaredType(); - if (T_split != D_split) - OS << ":'" << QualType::getAsString(D_split, PrintPolicy) << "'"; + if (T_split != D_split) { + std::string D_str = QualType::getAsString(D_split, PrintPolicy); + if (T_str != D_str) + OS << ":'" << QualType::getAsString(D_split, PrintPolicy) << "'"; + } } } diff --git a/clang/test/AST/HLSL/this-reference-template.hlsl b/clang/test/AST/HLSL/this-reference-template.hlsl index 3b7fba3efdc74..60e057986ebf8 100644 --- a/clang/test/AST/HLSL/this-reference-template.hlsl +++ b/clang/test/AST/HLSL/this-reference-template.hlsl @@ -35,12 +35,12 @@ void main() { // CHECK: -CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:8:5 used getFirst 'int ()' implicit_instantiation implicit-inline // CHECK-NEXT:-CompoundStmt 0x{{[0-9A-Fa-f]+}} // CHECK-NEXT:-ReturnStmt 0x{{[0-9A-Fa-f]+}} -// CHECK-NEXT:-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'int':'int' -// CHECK-NEXT:-MemberExpr 0x{{[0-9A-Fa-f]+}} 'int':'int' lvalue .First 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'int' +// CHECK-NEXT:-MemberExpr 0x{{[0-9A-Fa-f]+}} 'int' lvalue .First 0x{{[0-9A-Fa-f]+}} // CHECK-NEXT:-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue this // CHECK-NEXT:-CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:12:5 used getSecond 'float ()' implicit_instantiation implicit-inline // CHECK-NEXT:-CompoundStmt 0x{{[0-9A-Fa-f]+}} // CHECK-NEXT:-ReturnStmt 0x{{[0-9A-Fa-f]+}} -// CHECK-NEXT:-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'float':'float' -// CHECK-NEXT:-MemberExpr 0x{{[0-9A-Fa-f]+}} 'float':'float' lvalue .Second 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'float' +// CHECK-NEXT:-MemberExpr 0x{{[0-9A-Fa-f]+}} 'float' lvalue .Second 0x{{[0-9A-Fa-f]+}} // CHECK-NEXT:-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue implicit this diff --git a/clang/test/AST/ast-dump-APValue-anon-union.cpp b/clang/test/AST/ast-dump-APValue-anon-union.cpp index 906bfe4857ed0..0e6466ee1fd73 100644 --- a/clang/test/AST/ast-dump-APValue-anon-union.cpp +++ b/clang/test/AST/ast-dump-APValue-anon-union.cpp @@ -30,23 +30,23 @@ union U1 { void Test() { constexpr S0 s0{}; - // CHECK: | `-VarDecl {{.*}} col:{{.*}} s0 'const S0':'const S0' constexpr listinit + // CHECK: | `-VarDecl {{.*}} col:{{.*}} s0 'const S0' constexpr listinit // CHECK-NEXT: | |-value: Struct // CHECK-NEXT: | | `-field: Union .i Int 42 constexpr U0 u0a{}; - // CHECK: | `-VarDecl {{.*}} col:{{.*}} u0a 'const U0':'const U0' constexpr listinit + // CHECK: | `-VarDecl {{.*}} col:{{.*}} u0a 'const U0' constexpr listinit // CHECK-NEXT: | |-value: Union None constexpr U0 u0b{3.1415f}; - // CHECK: | `-VarDecl {{.*}} col:{{.*}} u0b 'const U0':'const U0' constexpr listinit + // CHECK: | `-VarDecl {{.*}} col:{{.*}} u0b 'const U0' constexpr listinit // CHECK-NEXT: | |-value: Union .U0::(anonymous union at {{.*}}) Union .f Float 3.141500e+00 constexpr U1 u1a{}; - // CHECK: | `-VarDecl {{.*}} col:{{.*}} u1a 'const U1':'const U1' constexpr listinit + // CHECK: | `-VarDecl {{.*}} col:{{.*}} u1a 'const U1' constexpr listinit // CHECK-NEXT: | |-value: Union .U1::(anonymous union at {{.*}}) Union .f Float 0.000000e+00 constexpr U1 u1b{3.1415f}; - // CHECK: `-VarDecl {{.*}} col:{{.*}} u1b 'const U1':'const U1' constexpr listinit + // CHECK: `-VarDecl {{.*}} col:{{.*}} u1b 'const U1' constexpr listinit // CHECK-NEXT: |-value: Union .U1::(anonymous union at {{.*}}) Union .f Float 3.141500e+00 } diff --git a/clang/test/AST/ast-dump-APValue-struct.cpp b/clang/test/AST/ast-dump-APValue-struct.cpp index 04d1877c293d1..4730404abc287 100644 --- a/clang/test/AST/ast-dump-APValue-struct.cpp +++ b/clang/test/AST/ast-dump-APValue-struct.cpp @@ -60,12 +60,12 @@ struct S5 : S4 { void Test() { constexpr S0 s0{}; - // CHECK: | `-VarDecl {{.*}} col:{{.*}} s0 'const S0':'const S0' constexpr listinit + // CHECK: | `-VarDecl {{.*}} col:{{.*}} s0 'const S0' constexpr listinit // CHECK-NEXT: | |-value: Struct // CHECK-NEXT: | | `-fields: Int 0, Union .j Int 0 constexpr S1 s1{}; - // CHECK: | `-VarDecl {{.*}} col:{{.*}} s1 'const S1':'const S1' constexpr listinit + // CHECK: | `-VarDecl {{.*}} col:{{.*}} s1 'const S1' constexpr listinit // CHECK-NEXT: | |-value: Struct // CHECK-NEXT: | | |-field: Int 0 // CHECK-NEXT: | | `-field: Union .s @@ -73,12 +73,12 @@ void Test() { // CHECK-NEXT: | | `-field: Int 0 constexpr S2 s2{}; - // CHECK: | `-VarDecl {{.*}} col:{{.*}} s2 'const S2':'const S2' constexpr listinit + // CHECK: | `-VarDecl {{.*}} col:{{.*}} s2 'const S2' constexpr listinit // CHECK-NEXT: | |-value: Struct // CHECK-NEXT: | | `-fields: Int 0, Union .u Union .j Int 0 constexpr S3 s3{}; - // CHECK: | `-VarDecl {{.*}} col:{{.*}} s3 'const S3':'const S3' constexpr listinit + // CHECK: | `-VarDecl {{.*}} col:{{.*}} s3 'const S3' constexpr listinit // CHECK-NEXT: | |-value: Struct // CHECK-NEXT: | | |-field: Int 0 // CHECK-NEXT: | | `-field: Union .u @@ -87,7 +87,7 @@ void Test() { // CHECK-NEXT: | | `-field: Int 0 constexpr S4 s4{}; - // CHECK: | `-VarDecl {{.*}} col:{{.*}} s4 'const S4':'const S4' constexpr listinit + // CHECK: | `-VarDecl {{.*}} col:{{.*}} s4 'const S4' constexpr listinit // CHECK-NEXT: | |-value: Struct // CHECK-NEXT: | | |-base: Struct // CHECK-NEXT: | | | `-fields: Int 0, Union .j Int 0 @@ -96,7 +96,7 @@ void Test() { // CHECK-NEXT: | | `-fields: Int 4, Int 5, Int 6 constexpr S5 s5{}; - // CHECK: `-VarDecl {{.*}} col:{{.*}} s5 'const S5':'const S5' constexpr listinit + // CHECK: `-VarDecl {{.*}} col:{{.*}} s5 'const S5' constexpr listinit // CHECK-NEXT: |-value: Struct // CHECK-NEXT: | |-base: Struct // CHECK-NEXT: | | |-base: Struct diff --git a/clang/test/AST/ast-dump-APValue-union.cpp b/clang/test/AST/ast-dump-APValue-union.cpp index b70b5ea484a6e..c717b6ece7382 100644 --- a/clang/test/AST/ast-dump-APValue-union.cpp +++ b/clang/test/AST/ast-dump-APValue-union.cpp @@ -39,25 +39,25 @@ union U3 { void Test() { constexpr U0 u0{}; - // CHECK: | `-VarDecl {{.*}} col:{{.*}} u0 'const U0':'const U0' constexpr listinit + // CHECK: | `-VarDecl {{.*}} col:{{.*}} u0 'const U0' constexpr listinit // CHECK-NEXT: | |-value: Union .i Int 42 constexpr U1 u1{}; - // CHECK: | `-VarDecl {{.*}} col:{{.*}} u1 'const U1':'const U1' constexpr listinit + // CHECK: | `-VarDecl {{.*}} col:{{.*}} u1 'const U1' constexpr listinit // CHECK-NEXT: | |-value: Union .uinner Union .f Float 3.141500e+00 constexpr U2 u2{}; - // CHECK: | `-VarDecl {{.*}} col:{{.*}} u2 'const U2':'const U2' constexpr listinit + // CHECK: | `-VarDecl {{.*}} col:{{.*}} u2 'const U2' constexpr listinit // CHECK-NEXT: | |-value: Union .uinner // CHECK-NEXT: | | `-Union .arr // CHECK-NEXT: | | `-Array size=2 // CHECK-NEXT: | | `-elements: Int 1, Int 2 constexpr U3 u3a = {.f = 3.1415}; - // CHECK: | `-VarDecl {{.*}} col:{{.*}} u3a 'const U3':'const U3' constexpr cinit + // CHECK: | `-VarDecl {{.*}} col:{{.*}} u3a 'const U3' constexpr cinit // CHECK-NEXT: | |-value: Union .f Float 3.141500e+00 constexpr U3 u3b = {.uinner = {}}; - // CHECK: `-VarDecl {{.*}} col:{{.*}} u3b 'const U3':'const U3' constexpr cinit + // CHECK: `-VarDecl {{.*}} col:{{.*}} u3b 'const U3' constexpr cinit // CHECK-NEXT: |-value: Union .uinner Union .d Float 3.141500e+00 } diff --git a/clang/test/AST/ast-dump-attr.cpp b/clang/test/AST/ast-dump-attr.cpp index 8fd4a8f3a54c6..f5a7481571421 100644 --- a/clang/test/AST/ast-dump-attr.cpp +++ b/clang/test/AST/ast-dump-attr.cpp @@ -146,17 +146,17 @@ struct C { char a[16]; }; // CHECK: ClassTemplateSpecializationDecl {{.*}} struct my_union // CHECK: CXXRecordDecl {{.*}} implicit struct my_union // CHECK: FieldDecl {{.*}} buffer 'char[1024]' -// CHECK-NEXT: AlignedAttr {{.*}} alignas 'TestAligns::A':'TestAligns::A' -// CHECK-NEXT: AlignedAttr {{.*}} alignas 'TestAligns::B':'TestAligns::B' -// CHECK-NEXT: AlignedAttr {{.*}} alignas 'TestAligns::C':'TestAligns::C' +// CHECK-NEXT: AlignedAttr {{.*}} alignas 'TestAligns::A' +// CHECK-NEXT: AlignedAttr {{.*}} alignas 'TestAligns::B' +// CHECK-NEXT: AlignedAttr {{.*}} alignas 'TestAligns::C' my_union my_union_val; // CHECK: ClassTemplateSpecializationDecl {{.*}} struct my_union2 // CHECK: CXXRecordDecl {{.*}} implicit struct my_union2 // CHECK: FieldDecl {{.*}} buffer 'char[1024]' -// CHECK-NEXT: AlignedAttr {{.*}} _Alignas 'TestAligns::A':'TestAligns::A' -// CHECK-NEXT: AlignedAttr {{.*}} _Alignas 'TestAligns::B':'TestAligns::B' -// CHECK-NEXT: AlignedAttr {{.*}} _Alignas 'TestAligns::C':'TestAligns::C' +// CHECK-NEXT: AlignedAttr {{.*}} _Alignas 'TestAligns::A' +// CHECK-NEXT: AlignedAttr {{.*}} _Alignas 'TestAligns::B' +// CHECK-NEXT: AlignedAttr {{.*}} _Alignas 'TestAligns::C' my_union2 my_union2_val; } // namespace TestAligns diff --git a/clang/test/AST/ast-dump-decl-json.c b/clang/test/AST/ast-dump-decl-json.c index 55b918afaab5d..7f53cda5020b5 100644 --- a/clang/test/AST/ast-dump-decl-json.c +++ b/clang/test/AST/ast-dump-decl-json.c @@ -1351,7 +1351,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "enum Enum", // CHECK-NEXT: "qualType": "enum Enum" // CHECK-NEXT: } // CHECK-NEXT: }, @@ -1424,7 +1423,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "enum Enum", // CHECK-NEXT: "qualType": "enum Enum" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -1446,7 +1444,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "enum Enum", // CHECK-NEXT: "qualType": "enum Enum" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -1455,7 +1452,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "name": "x", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "enum Enum", // CHECK-NEXT: "qualType": "enum Enum" // CHECK-NEXT: } // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-decl-json.m b/clang/test/AST/ast-dump-decl-json.m index 9d82c6696cb52..f7067ac0d3b77 100644 --- a/clang/test/AST/ast-dump-decl-json.m +++ b/clang/test/AST/ast-dump-decl-json.m @@ -911,7 +911,6 @@ void f(void) { // CHECK-NEXT: }, // CHECK-NEXT: "name": "T", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-decl.cpp b/clang/test/AST/ast-dump-decl.cpp index 017e640aeaea6..d74aa9045532e 100644 --- a/clang/test/AST/ast-dump-decl.cpp +++ b/clang/test/AST/ast-dump-decl.cpp @@ -248,19 +248,19 @@ namespace testFunctionTemplateDecl { // CHECK-NEXT: | |-TemplateArgument type 'testFunctionTemplateDecl::A' // CHECK-NEXT: | | `-RecordType 0{{.+}} 'testFunctionTemplateDecl::A' // CHECK-NEXT: | | `-CXXRecord 0x{{.+}} 'A' - // CHECK-NEXT: | |-ParmVarDecl 0x{{.+}} col:51 'testFunctionTemplateDecl::A':'testFunctionTemplateDecl::A' + // CHECK-NEXT: | |-ParmVarDecl 0x{{.+}} col:51 'testFunctionTemplateDecl::A' // CHECK-NEXT: | `-CompoundStmt 0x{{.+}} // CHECK-NEXT: |-Function 0x{{.+}} 'TestFunctionTemplate' 'void (B)' // CHECK-NEXT: |-FunctionDecl 0x{{.+}} col:29 TestFunctionTemplate 'void (testFunctionTemplateDecl::C)' // CHECK-NEXT: | |-TemplateArgument type 'testFunctionTemplateDecl::C' // CHECK-NEXT: | | `-RecordType 0{{.+}} 'testFunctionTemplateDecl::C' // CHECK-NEXT: | | `-CXXRecord 0x{{.+}} 'C' - // CHECK-NEXT: | `-ParmVarDecl 0x{{.+}} col:51 'testFunctionTemplateDecl::C':'testFunctionTemplateDecl::C' + // CHECK-NEXT: | `-ParmVarDecl 0x{{.+}} col:51 'testFunctionTemplateDecl::C' // CHECK-NEXT: `-FunctionDecl 0x{{.+}} col:29 TestFunctionTemplate 'void (testFunctionTemplateDecl::D)' // CHECK-NEXT: |-TemplateArgument type 'testFunctionTemplateDecl::D' // CHECK-NEXT: | `-RecordType 0{{.+}} 'testFunctionTemplateDecl::D' // CHECK-NEXT: | `-CXXRecord 0x{{.+}} 'D' - // CHECK-NEXT: |-ParmVarDecl 0x{{.+}} col:51 'testFunctionTemplateDecl::D':'testFunctionTemplateDecl::D' + // CHECK-NEXT: |-ParmVarDecl 0x{{.+}} col:51 'testFunctionTemplateDecl::D' // CHECK-NEXT: `-CompoundStmt 0x{{.+}} // CHECK: FunctionDecl 0x{{.+}} prev 0x{{.+}} <{{.+}}:[[@LINE-32]]:3, col:41> col:19 TestFunctionTemplate 'void (B)' @@ -500,7 +500,7 @@ namespace testCanonicalTemplate { // CHECK-NEXT: |-TemplateArgument type 'testCanonicalTemplate::A'{{$}} // CHECK-NEXT: | `-RecordType 0x{{.+}} 'testCanonicalTemplate::A'{{$}} // CHECK-NEXT: | `-CXXRecord 0x{{.+}} 'A'{{$}} - // CHECK-NEXT: `-ParmVarDecl 0x{{.*}} col:51 'testCanonicalTemplate::A':'testCanonicalTemplate::A'{{$}} + // CHECK-NEXT: `-ParmVarDecl 0x{{.*}} col:51 'testCanonicalTemplate::A'{{$}} // CHECK: FunctionTemplateDecl 0x{{.+}} prev 0x{{.+}} <{{.+}}:[[@LINE-12]]:3, col:51> col:29 TestFunctionTemplate{{$}} // CHECK-NEXT: |-TemplateTypeParmDecl 0x{{.+}} col:21 referenced typename depth 0 index 0 T{{$}} @@ -613,15 +613,15 @@ namespace testCanonicalTemplate { // CHECK: VarTemplateDecl 0x{{.+}} <{{.+}}:[[@LINE-11]]:7, col:43> col:43 TestVarTemplate{{$}} // CHECK-NEXT: |-TemplateTypeParmDecl 0x{{.+}} col:25 referenced typename depth 0 index 0 T{{$}} // CHECK-NEXT: |-VarDecl 0x{{.+}} col:43 TestVarTemplate 'const T' static{{$}} - // CHECK-NEXT: |-VarTemplateSpecializationDecl 0x{{.+}} parent 0x{{.+}} prev 0x{{.+}} col:14 referenced TestVarTemplate 'const int':'const int' implicit_instantiation cinit{{$}} + // CHECK-NEXT: |-VarTemplateSpecializationDecl 0x{{.+}} parent 0x{{.+}} prev 0x{{.+}} col:14 referenced TestVarTemplate 'const int' implicit_instantiation cinit{{$}} // CHECK-NEXT: | |-NestedNameSpecifier TypeSpec 'testCanonicalTemplate::S'{{$}} // CHECK-NEXT: | |-TemplateArgument type 'int'{{$}} // CHECK-NEXT: | | `-BuiltinType 0x{{.+}} 'int'{{$}} - // CHECK-NEXT: | `-InitListExpr 0x{{.+}} 'int':'int'{{$}} - // CHECK-NEXT: `-VarTemplateSpecializationDecl 0x{{.+}} col:43 referenced TestVarTemplate 'const int':'const int' implicit_instantiation static{{$}} + // CHECK-NEXT: | `-InitListExpr 0x{{.+}} 'int'{{$}} + // CHECK-NEXT: `-VarTemplateSpecializationDecl 0x{{.+}} col:43 referenced TestVarTemplate 'const int' implicit_instantiation static{{$}} // CHECK-NEXT: `-TemplateArgument type 'int'{{$}} - // CHECK: VarTemplateSpecializationDecl 0x{{.+}} <{{.+}}:[[@LINE-22]]:28, col:43> col:43 referenced TestVarTemplate 'const int':'const int' implicit_instantiation static{{$}} + // CHECK: VarTemplateSpecializationDecl 0x{{.+}} <{{.+}}:[[@LINE-22]]:28, col:43> col:43 referenced TestVarTemplate 'const int' implicit_instantiation static{{$}} // CHECK-NEXT:`-TemplateArgument type 'int'{{$}} // CHECK-NEXT: `-BuiltinType 0x{{.+}} 'int'{{$}} @@ -630,14 +630,14 @@ namespace testCanonicalTemplate { // CHECK-NEXT: |-VarDecl 0x{{.+}} parent 0x{{.+}} prev 0x{{.+}} col:14 TestVarTemplate 'const T' cinit{{$}} // CHECK-NEXT: | |-NestedNameSpecifier TypeSpec 'testCanonicalTemplate::S'{{$}} // CHECK-NEXT: | `-InitListExpr 0x{{.+}} 'void'{{$}} - // CHECK-NEXT: |-VarTemplateSpecialization 0x{{.+}} 'TestVarTemplate' 'const int':'const int'{{$}} - // CHECK-NEXT: `-VarTemplateSpecialization 0x{{.+}} 'TestVarTemplate' 'const int':'const int'{{$}} + // CHECK-NEXT: |-VarTemplateSpecialization 0x{{.+}} 'TestVarTemplate' 'const int'{{$}} + // CHECK-NEXT: `-VarTemplateSpecialization 0x{{.+}} 'TestVarTemplate' 'const int'{{$}} - // CHECK: VarTemplateSpecializationDecl 0x{{.+}} parent 0x{{.+}} prev 0x{{.+}} <{{.+}}:[[@LINE-31]]:3, col:34> col:14 referenced TestVarTemplate 'const int':'const int' implicit_instantiation cinit{{$}} + // CHECK: VarTemplateSpecializationDecl 0x{{.+}} parent 0x{{.+}} prev 0x{{.+}} <{{.+}}:[[@LINE-31]]:3, col:34> col:14 referenced TestVarTemplate 'const int' implicit_instantiation cinit{{$}} // CHECK-NEXT: |-NestedNameSpecifier TypeSpec 'testCanonicalTemplate::S'{{$}} // CHECK-NEXT: |-TemplateArgument type 'int'{{$}} // CHECK-NEXT: | `-BuiltinType 0x{{.+}} 'int'{{$}} - // CHECK-NEXT: `-InitListExpr 0x{{.+}} 'int':'int'{{$}} + // CHECK-NEXT: `-InitListExpr 0x{{.+}} 'int'{{$}} } template @@ -856,7 +856,7 @@ namespace TestConstexprVariableTemplateWithInitializer { inline constexpr in_place_type_t<_Tp> in_place_type{}; // CHECK: -VarTemplateDecl 0x{{.+}} col:41 in_place_type{{$}} // CHECK-NEXT: |-TemplateTypeParmDecl 0x{{.+}} col:22 referenced typename depth 0 index 0 _Tp{{$}} - // CHECK-NEXT: `-VarDecl 0x{{.+}} col:41 in_place_type 'const in_place_type_t<_Tp>':'const in_place_type_t<_Tp>' inline constexpr listinit{{$}} + // CHECK-NEXT: `-VarDecl 0x{{.+}} col:41 in_place_type 'const in_place_type_t<_Tp>' inline constexpr listinit{{$}} // CHECK-NEXT: `-InitListExpr 0x{{.+}} 'void'{{$}} template constexpr T call_init(0); diff --git a/clang/test/AST/ast-dump-decl.m b/clang/test/AST/ast-dump-decl.m index 0b259d522645d..5f09b6042f409 100644 --- a/clang/test/AST/ast-dump-decl.m +++ b/clang/test/AST/ast-dump-decl.m @@ -94,7 +94,7 @@ @interface TestGenericInterface : A

{ // CHECK: ObjCInterfaceDecl{{.*}} TestGenericInterface // CHECK-NEXT: -super ObjCInterface {{.+}} 'A' // CHECK-NEXT: -ObjCProtocol {{.+}} 'P' -// CHECK-NEXT: -ObjCTypeParamDecl {{.+}} col:33 T 'id':'id' +// CHECK-NEXT: -ObjCTypeParamDecl {{.+}} col:33 T 'id' @implementation TestObjCClass (TestObjCCategoryDecl) - (void) bar { diff --git a/clang/test/AST/ast-dump-expr-json.c b/clang/test/AST/ast-dump-expr-json.c index 14238283af3f2..e910864eeed65 100644 --- a/clang/test/AST/ast-dump-expr-json.c +++ b/clang/test/AST/ast-dump-expr-json.c @@ -4080,7 +4080,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "b", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "struct S", // CHECK-NEXT: "qualType": "struct S" // CHECK-NEXT: } // CHECK-NEXT: }, @@ -4530,7 +4529,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "struct S", // CHECK-NEXT: "qualType": "struct S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -4539,7 +4537,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "name": "b", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "struct S", // CHECK-NEXT: "qualType": "struct S" // CHECK-NEXT: } // CHECK-NEXT: } @@ -4817,7 +4814,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "struct S", // CHECK-NEXT: "qualType": "struct S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -4839,7 +4835,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "struct S", // CHECK-NEXT: "qualType": "struct S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -4860,7 +4855,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "struct S", // CHECK-NEXT: "qualType": "struct S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", diff --git a/clang/test/AST/ast-dump-expr-json.cpp b/clang/test/AST/ast-dump-expr-json.cpp index eac0346d64319..0fb07b0b434cc 100644 --- a/clang/test/AST/ast-dump-expr-json.cpp +++ b/clang/test/AST/ast-dump-expr-json.cpp @@ -325,7 +325,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "obj1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } // CHECK-NEXT: }, @@ -462,7 +461,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -471,7 +469,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "name": "obj1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } // CHECK-NEXT: } @@ -733,7 +730,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -742,7 +738,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "name": "obj1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } // CHECK-NEXT: } @@ -2539,7 +2534,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } // CHECK-NEXT: }, @@ -2672,7 +2666,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -2681,7 +2674,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "name": "a", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } // CHECK-NEXT: } @@ -2848,7 +2840,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int", // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -2948,7 +2939,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "float", // CHECK-NEXT: "qualType": "float" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -2992,7 +2982,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -3001,7 +2990,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "name": "a", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } // CHECK-NEXT: } @@ -3169,7 +3157,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -3178,7 +3165,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "name": "a", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } // CHECK-NEXT: } @@ -3247,7 +3233,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -3256,7 +3241,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "name": "a", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } // CHECK-NEXT: } @@ -3500,7 +3484,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -3509,7 +3492,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "name": "a", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } // CHECK-NEXT: } @@ -3537,7 +3519,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", // CHECK-NEXT: "typeArg": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } // CHECK-NEXT: }, @@ -3562,11 +3543,9 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", // CHECK-NEXT: "typeArg": { -// CHECK-NEXT: "desugaredQualType": "const volatile S", // CHECK-NEXT: "qualType": "const volatile S" // CHECK-NEXT: }, // CHECK-NEXT: "adjustedTypeArg": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } // CHECK-NEXT: } @@ -6591,7 +6570,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int", // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } // CHECK-NEXT: } @@ -7858,7 +7836,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, // CHECK-NEXT: "init": "call", @@ -7879,7 +7856,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -8030,7 +8006,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -8039,7 +8014,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "x", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: } // CHECK-NEXT: } @@ -8139,7 +8113,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -8187,7 +8160,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -8196,7 +8168,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "x", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: } // CHECK-NEXT: } @@ -8299,7 +8270,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, // CHECK-NEXT: "init": "call", @@ -8320,7 +8290,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -8470,7 +8439,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -8479,7 +8447,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "x", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: } // CHECK-NEXT: } @@ -8582,7 +8549,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, // CHECK-NEXT: "init": "call", @@ -8603,7 +8569,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -8799,7 +8764,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -8808,7 +8772,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "x", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: } // CHECK-NEXT: } @@ -8908,7 +8871,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -8956,7 +8918,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -8965,7 +8926,6 @@ void TestNonADLCall3() { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "x", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: } // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-expr-json.m b/clang/test/AST/ast-dump-expr-json.m index bbde4451f9fe3..3c502c0496308 100644 --- a/clang/test/AST/ast-dump-expr-json.m +++ b/clang/test/AST/ast-dump-expr-json.m @@ -764,7 +764,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "Obj", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: } @@ -825,7 +824,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -848,7 +846,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -858,7 +855,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: "kind": "ParmVarDecl", // CHECK-NEXT: "name": "Obj", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: } @@ -2036,7 +2032,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -2468,7 +2463,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -2490,7 +2484,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -2678,7 +2671,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -2700,7 +2692,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -2864,7 +2855,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "i", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -2886,7 +2876,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -3163,7 +3152,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -3312,7 +3300,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -3788,7 +3775,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -3810,7 +3796,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -3998,7 +3983,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -4020,7 +4004,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -4113,7 +4096,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -4208,7 +4190,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -4231,7 +4212,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -4241,7 +4221,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "i", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: } @@ -4263,7 +4242,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -4584,7 +4562,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, @@ -4681,7 +4658,6 @@ void TestObjCBoolLiteral(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, diff --git a/clang/test/AST/ast-dump-expr.c b/clang/test/AST/ast-dump-expr.c index 8605cd4bbaa16..959d61ec9794b 100644 --- a/clang/test/AST/ast-dump-expr.c +++ b/clang/test/AST/ast-dump-expr.c @@ -259,7 +259,7 @@ void PostfixOperators(int *a, struct S b, struct S *c) { b.a; // CHECK: ImplicitCastExpr // CHECK-NEXT: MemberExpr 0x{{[^ ]*}} 'int' lvalue .a 0x{{[^ ]*}} - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'struct S':'struct S' lvalue ParmVar 0x{{[^ ]*}} 'b' 'struct S':'struct S' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'struct S' lvalue ParmVar 0x{{[^ ]*}} 'b' 'struct S' c->a; // CHECK: ImplicitCastExpr @@ -280,8 +280,8 @@ void PostfixOperators(int *a, struct S b, struct S *c) { (struct S){1}; // CHECK: ImplicitCastExpr - // CHECK-NEXT: CompoundLiteralExpr 0x{{[^ ]*}} 'struct S':'struct S' lvalue - // CHECK-NEXT: InitListExpr 0x{{[^ ]*}} 'struct S':'struct S' + // CHECK-NEXT: CompoundLiteralExpr 0x{{[^ ]*}} 'struct S' lvalue + // CHECK-NEXT: InitListExpr 0x{{[^ ]*}} 'struct S' // CHECK-NEXT: IntegerLiteral 0x{{[^ ]*}} 'int' 1 } diff --git a/clang/test/AST/ast-dump-expr.cpp b/clang/test/AST/ast-dump-expr.cpp index 1c07aa066462b..69e65e22d61d0 100644 --- a/clang/test/AST/ast-dump-expr.cpp +++ b/clang/test/AST/ast-dump-expr.cpp @@ -59,7 +59,7 @@ void Throw() { void PointerToMember(S obj1, S *obj2, int S::* data, void (S::*call)(int)) { obj1.*data; // CHECK: BinaryOperator 0x{{[^ ]*}} 'int' lvalue '.*' - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S':'S' lvalue ParmVar 0x{{[^ ]*}} 'obj1' 'S':'S' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S' lvalue ParmVar 0x{{[^ ]*}} 'obj1' 'S' // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int S::*' lvalue ParmVar 0x{{[^ ]*}} 'data' 'int S::*' @@ -74,7 +74,7 @@ void PointerToMember(S obj1, S *obj2, int S::* data, void (S::*call)(int)) { // CHECK: CXXMemberCallExpr 0x{{[^ ]*}} 'void' // CHECK-NEXT: ParenExpr 0x{{[^ ]*}} '' // CHECK-NEXT: BinaryOperator 0x{{[^ ]*}} '' '.*' - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S':'S' lvalue ParmVar 0x{{[^ ]*}} 'obj1' 'S':'S' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S' lvalue ParmVar 0x{{[^ ]*}} 'obj1' 'S' // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'void (S::*)(int)' lvalue ParmVar 0x{{[^ ]*}} 'call' 'void (S::*)(int)' // CHECK-NEXT: IntegerLiteral 0x{{[^ ]*}} 'int' 12 @@ -178,7 +178,7 @@ void PostfixExpressions(S a, S *p, U *r) { a.func(0); // CHECK: CXXMemberCallExpr 0x{{[^ ]*}} 'void' // CHECK-NEXT: MemberExpr 0x{{[^ ]*}} '' .func 0x{{[^ ]*}} - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S':'S' lvalue ParmVar 0x{{[^ ]*}} 'a' 'S':'S' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S' lvalue ParmVar 0x{{[^ ]*}} 'a' 'S' // CHECK-NEXT: IntegerLiteral 0x{{[^ ]*}} 'int' 0 p->func(0); @@ -190,16 +190,16 @@ void PostfixExpressions(S a, S *p, U *r) { // FIXME: there is no mention that this used the template keyword. p->template foo(); - // CHECK: CXXMemberCallExpr 0x{{[^ ]*}} 'int':'int' + // CHECK: CXXMemberCallExpr 0x{{[^ ]*}} 'int' // CHECK-NEXT: MemberExpr 0x{{[^ ]*}} '' ->foo 0x{{[^ ]*}} // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S *' lvalue ParmVar 0x{{[^ ]*}} 'p' 'S *' // FIXME: there is no mention that this used the template keyword. a.template foo(); - // CHECK: CXXMemberCallExpr 0x{{[^ ]*}} 'float':'float' + // CHECK: CXXMemberCallExpr 0x{{[^ ]*}} 'float' // CHECK-NEXT: MemberExpr 0x{{[^ ]*}} '' .foo 0x{{[^ ]*}} - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S':'S' lvalue ParmVar 0x{{[^ ]*}} 'a' 'S':'S' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S' lvalue ParmVar 0x{{[^ ]*}} 'a' 'S' p->~S(); // CHECK: CXXMemberCallExpr 0x{{[^ ]*}} 'void' @@ -210,14 +210,14 @@ void PostfixExpressions(S a, S *p, U *r) { a.~S(); // CHECK: CXXMemberCallExpr 0x{{[^ ]*}} 'void' // CHECK-NEXT: MemberExpr 0x{{[^ ]*}} '' .~S 0x{{[^ ]*}} - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S':'S' lvalue ParmVar 0x{{[^ ]*}} 'a' 'S':'S' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S' lvalue ParmVar 0x{{[^ ]*}} 'a' 'S' // FIXME: there seems to be no way to distinguish the construct below from // the construct above. a.~decltype(a)(); // CHECK: CXXMemberCallExpr 0x{{[^ ]*}} 'void' // CHECK-NEXT: MemberExpr 0x{{[^ ]*}} '' .~S 0x{{[^ ]*}} - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S':'S' lvalue ParmVar 0x{{[^ ]*}} 'a' 'S':'S' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S' lvalue ParmVar 0x{{[^ ]*}} 'a' 'S' // FIXME: similarly, there is no way to distinguish the construct below from // the p->~S() case. @@ -233,13 +233,13 @@ void PostfixExpressions(S a, S *p, U *r) { r->template U::~U(); // CHECK: CXXMemberCallExpr 0x{{[^ ]*}} 'void' // CHECK-NEXT: MemberExpr 0x{{[^ ]*}} '' ->~U 0x{{[^ ]*}} - // CHECK-NEXT: NestedNameSpecifier TypeSpecWithTemplate 'U':'U' + // CHECK-NEXT: NestedNameSpecifier TypeSpecWithTemplate 'U' // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'U *' lvalue ParmVar 0x{{[^ ]*}} 'r' 'U *' typeid(a); // CHECK: CXXTypeidExpr 0x{{[^ ]*}} 'const std::type_info' lvalue - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S':'S' lvalue ParmVar 0x{{[^ ]*}} 'a' 'S':'S' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'S' lvalue ParmVar 0x{{[^ ]*}} 'a' 'S' // FIXME: no type information is printed for the argument. typeid(S); @@ -448,7 +448,7 @@ void PrimaryExpressions(Ts... a) { // CHECK-NEXT: CXXMethodDecl 0x{{[^ ]*}} col:3 operator() 'auto () const -> auto' inline // CHECK-NEXT: CompoundStmt // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:4 implicit 'Ts...' - // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:10 implicit 'int':'int' + // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:10 implicit 'int' // CHECK-NEXT: ParenListExpr 0x{{[^ ]*}} 'NULL TYPE' // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'Ts' lvalue ParmVar 0x{{[^ ]*}} 'a' 'Ts...' // CHECK-NEXT: IntegerLiteral 0x{{[^ ]*}} 'int' 12 diff --git a/clang/test/AST/ast-dump-fpfeatures.cpp b/clang/test/AST/ast-dump-fpfeatures.cpp index 9228b6e7238be..da0011602a728 100644 --- a/clang/test/AST/ast-dump-fpfeatures.cpp +++ b/clang/test/AST/ast-dump-fpfeatures.cpp @@ -140,7 +140,7 @@ float func_15(float x, float y) { // CHECK: FunctionDecl {{.*}} func_14 'float (float, float)' // CHECK: CompoundStmt // CHECK-NEXT: ReturnStmt -// CHECK-NEXT: BinaryOperator {{.*}} 'float':'float' '+' ConstRoundingMode=towardzero +// CHECK-NEXT: BinaryOperator {{.*}} 'float' '+' ConstRoundingMode=towardzero float func_16(float x, float y) { #pragma STDC FENV_ROUND FE_TOWARDZERO diff --git a/clang/test/AST/ast-dump-funcs.cpp b/clang/test/AST/ast-dump-funcs.cpp index 7d47893d4596d..61fb5d4eb654e 100644 --- a/clang/test/AST/ast-dump-funcs.cpp +++ b/clang/test/AST/ast-dump-funcs.cpp @@ -32,7 +32,7 @@ struct S { // CHECK-NEXT: CXXCtorInitializer Field 0x{{[^ ]*}} 'j' 'int' // CHECK-NEXT: IntegerLiteral 0x{{[^ ]*}} 'int' 0 // CHECK-NEXT: CXXCtorInitializer Field 0x{{[^ ]*}} 'r' 'R' - // CHECK-NEXT: CXXConstructExpr 0x{{[^ ]*}} 'R':'R' 'void () noexcept' + // CHECK-NEXT: CXXConstructExpr 0x{{[^ ]*}} 'R' 'void () noexcept' // CHECK-NEXT: CompoundStmt 0x{{[^ ]*}} void a(); diff --git a/clang/test/AST/ast-dump-functionprototype.cpp b/clang/test/AST/ast-dump-functionprototype.cpp index 4ba57bd5ace0e..d831e5da2d320 100644 --- a/clang/test/AST/ast-dump-functionprototype.cpp +++ b/clang/test/AST/ast-dump-functionprototype.cpp @@ -5,7 +5,7 @@ struct B {}; typedef void (type1)() noexcept(10 > 5); -// CHECK: TypedefDecl {{.*}} type1 'void () noexcept(10 > 5)':'void () noexcept(10 > 5)' +// CHECK: TypedefDecl {{.*}} type1 'void () noexcept(10 > 5)' // CHECK-NEXT: `-ParenType {{.*}} // CHECK-NEXT: `-FunctionProtoType {{.*}} 'void () noexcept(10 > 5)' exceptionspec_noexcept_true cdecl // CHECK-NEXT: |-NoexceptExpr: ConstantExpr {{.*}} 'bool' @@ -14,9 +14,9 @@ typedef void (type1)() noexcept(10 > 5); typedef void (type2)() throw(A, B); -// CHECK: TypedefDecl {{.*}} type2 'void () throw(A, B)':'void () throw(A, B)' +// CHECK: TypedefDecl {{.*}} type2 'void () throw(A, B)' // CHECK-NEXT: `-ParenType {{.*}} // CHECK-NEXT: `-FunctionProtoType {{.*}} 'void () throw(A, B)' exceptionspec_dynamic cdecl -// CHECK-NEXT: |-Exceptions: 'A':'A', 'B':'B' +// CHECK-NEXT: |-Exceptions: 'A', 'B' // CHECK-NEXT: `-BuiltinType {{.*}} 'void' diff --git a/clang/test/AST/ast-dump-lambda.cpp b/clang/test/AST/ast-dump-lambda.cpp index 56d09b92df0e6..ef8789cd97d3e 100644 --- a/clang/test/AST/ast-dump-lambda.cpp +++ b/clang/test/AST/ast-dump-lambda.cpp @@ -229,7 +229,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | |-FieldDecl {{.*}} col:4{{( imported)?}} implicit 'Ts...' -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:10{{( imported)?}} implicit 'int':'int' +// CHECK-NEXT: | | `-FieldDecl {{.*}} col:10{{( imported)?}} implicit 'int' // CHECK-NEXT: | |-ParenListExpr {{.*}} 'NULL TYPE' // CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'Ts' lvalue ParmVar {{.*}} 'a' 'Ts...' // CHECK-NEXT: | |-IntegerLiteral {{.*}} 'int' 12 diff --git a/clang/test/AST/ast-dump-objc-arc-json.m b/clang/test/AST/ast-dump-objc-arc-json.m index 5792c8bf58776..86ca28e283a5b 100644 --- a/clang/test/AST/ast-dump-objc-arc-json.m +++ b/clang/test/AST/ast-dump-objc-arc-json.m @@ -22,7 +22,6 @@ id TestCompoundLiteral(id a) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", // CHECK-NEXT: "typeAliasDeclId": "0x{{.*}}" // CHECK-NEXT: }, diff --git a/clang/test/AST/ast-dump-openmp-begin-declare-variant_reference.cpp b/clang/test/AST/ast-dump-openmp-begin-declare-variant_reference.cpp index ef1d2033c5d10..1937a5d1c3eb3 100644 --- a/clang/test/AST/ast-dump-openmp-begin-declare-variant_reference.cpp +++ b/clang/test/AST/ast-dump-openmp-begin-declare-variant_reference.cpp @@ -121,7 +121,7 @@ int test(float &&f, short &&s) { // CHECK-NEXT: | | | `-LValueReferenceType [[ADDR_7:0x[a-z0-9]*]] 'float &' // CHECK-NEXT: | | | `-BuiltinType [[ADDR_8:0x[a-z0-9]*]] 'float' // CHECK-NEXT: | | |-CXXRecordDecl [[ADDR_9:0x[a-z0-9]*]] col:29 implicit struct remove_reference -// CHECK-NEXT: | | `-TypedefDecl [[ADDR_10:0x[a-z0-9]*]] col:67 referenced type 'float':'float' +// CHECK-NEXT: | | `-TypedefDecl [[ADDR_10:0x[a-z0-9]*]] col:67 referenced type 'float' // CHECK-NEXT: | | `-SubstTemplateTypeParmType [[ADDR_11:0x[a-z0-9]*]] 'float' sugar class depth 0 index 0 _Tp // CHECK-NEXT: | | |-ClassTemplateSpecialization [[ADDR_6]] 'remove_reference' // CHECK-NEXT: | | `-BuiltinType [[ADDR_8]] 'float' @@ -137,7 +137,7 @@ int test(float &&f, short &&s) { // CHECK-NEXT: | | `-LValueReferenceType [[ADDR_15:0x[a-z0-9]*]] 'short &' // CHECK-NEXT: | | `-BuiltinType [[ADDR_16:0x[a-z0-9]*]] 'short' // CHECK-NEXT: | |-CXXRecordDecl [[ADDR_17:0x[a-z0-9]*]] col:29 implicit struct remove_reference -// CHECK-NEXT: | `-TypedefDecl [[ADDR_18:0x[a-z0-9]*]] col:67 referenced type 'short':'short' +// CHECK-NEXT: | `-TypedefDecl [[ADDR_18:0x[a-z0-9]*]] col:67 referenced type 'short' // CHECK-NEXT: | `-SubstTemplateTypeParmType [[ADDR_19:0x[a-z0-9]*]] 'short' sugar class depth 0 index 0 _Tp // CHECK-NEXT: | |-ClassTemplateSpecialization [[ADDR_14]] 'remove_reference' // CHECK-NEXT: | `-BuiltinType [[ADDR_16]] 'short' diff --git a/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_1.cpp b/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_1.cpp index 5916958b94625..0dfed6ffa240d 100644 --- a/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_1.cpp +++ b/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_1.cpp @@ -124,14 +124,14 @@ int test() { // CHECK-NEXT: | |-CallExpr [[ADDR_63:0x[a-z0-9]*]] 'int' // CHECK-NEXT: | | |-ImplicitCastExpr [[ADDR_64:0x[a-z0-9]*]] 'int (*)(double)' // CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_65:0x[a-z0-9]*]] 'int (double)' {{.*}}Function [[ADDR_44]] 'also_after' 'int (double)' -// CHECK-NEXT: | | `-CXXFunctionalCastExpr [[ADDR_66:0x[a-z0-9]*]] 'double':'double' functional cast to double -// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_67:0x[a-z0-9]*]] 'double':'double' part_of_explicit_cast +// CHECK-NEXT: | | `-CXXFunctionalCastExpr [[ADDR_66:0x[a-z0-9]*]] 'double' functional cast to double +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_67:0x[a-z0-9]*]] 'double' part_of_explicit_cast // CHECK-NEXT: | | `-IntegerLiteral [[ADDR_58]] 'int' 0 // CHECK-NEXT: | `-CallExpr [[ADDR_68:0x[a-z0-9]*]] 'int' // CHECK-NEXT: | |-ImplicitCastExpr [[ADDR_69:0x[a-z0-9]*]] 'int (*)(double)' // CHECK-NEXT: | | `-DeclRefExpr [[ADDR_25]] 'int (double)' Function [[ADDR_26]] 'also_after[implementation={vendor(llvm)}]' 'int (double)' -// CHECK-NEXT: | `-CXXFunctionalCastExpr [[ADDR_66]] 'double':'double' functional cast to double -// CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_67]] 'double':'double' part_of_explicit_cast +// CHECK-NEXT: | `-CXXFunctionalCastExpr [[ADDR_66]] 'double' functional cast to double +// CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_67]] 'double' part_of_explicit_cast // CHECK-NEXT: | `-IntegerLiteral [[ADDR_58]] 'int' 0 // CHECK-NEXT: |-TypedefDecl [[ADDR_70:0x[a-z0-9]*]] col:14 referenced Ty 'int (*)({{.*}})' // CHECK-NEXT: | `-PointerType [[ADDR_71:0x[a-z0-9]*]] 'int (*)({{.*}})' diff --git a/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_2.cpp b/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_2.cpp index 9613e86634927..da46cef7f3f1b 100644 --- a/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_2.cpp +++ b/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_2.cpp @@ -66,7 +66,7 @@ int test() { // CHECK-NEXT: | `-FunctionDecl [[ADDR_10:0x[a-z0-9]*]] line:5:5 used also_before 'int (double)' // CHECK-NEXT: | |-TemplateArgument type 'double' // CHECK-NEXT: | | `-BuiltinType [[ADDR_11:0x[a-z0-9]*]] 'double' -// CHECK-NEXT: | |-ParmVarDecl [[ADDR_12:0x[a-z0-9]*]] col:18 'double':'double' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_12:0x[a-z0-9]*]] col:18 'double' // CHECK-NEXT: | |-CompoundStmt [[ADDR_13:0x[a-z0-9]*]] // CHECK-NEXT: | | `-ReturnStmt [[ADDR_14:0x[a-z0-9]*]] // CHECK-NEXT: | | `-IntegerLiteral [[ADDR_6]] 'int' 1 @@ -97,7 +97,7 @@ int test() { // CHECK-NEXT: | `-FunctionDecl [[ADDR_17]] line:18:1 also_before[implementation={extension(allow_templates)}] 'int (double)' // CHECK-NEXT: | |-TemplateArgument type 'double' // CHECK-NEXT: | | `-BuiltinType [[ADDR_11]] 'double' -// CHECK-NEXT: | |-ParmVarDecl [[ADDR_37:0x[a-z0-9]*]] col:18 'double':'double' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_37:0x[a-z0-9]*]] col:18 'double' // CHECK-NEXT: | `-CompoundStmt [[ADDR_38:0x[a-z0-9]*]] // CHECK-NEXT: | `-ReturnStmt [[ADDR_39:0x[a-z0-9]*]] // CHECK-NEXT: | `-IntegerLiteral [[ADDR_36]] 'int' 0 @@ -110,7 +110,7 @@ int test() { // CHECK-NEXT: | `-FunctionDecl [[ADDR_47:0x[a-z0-9]*]] line:44:5 used also_after 'int (char)' // CHECK-NEXT: | |-TemplateArgument type 'char' // CHECK-NEXT: | | `-BuiltinType [[ADDR_48:0x[a-z0-9]*]] 'char' -// CHECK-NEXT: | |-ParmVarDecl [[ADDR_49:0x[a-z0-9]*]] col:17 'char':'char' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_49:0x[a-z0-9]*]] col:17 'char' // CHECK-NEXT: | |-CompoundStmt [[ADDR_50:0x[a-z0-9]*]] // CHECK-NEXT: | | `-ReturnStmt [[ADDR_51:0x[a-z0-9]*]] // CHECK-NEXT: | | `-IntegerLiteral [[ADDR_52:0x[a-z0-9]*]] 'int' 6 @@ -126,7 +126,7 @@ int test() { // CHECK-NEXT: | `-FunctionDecl [[ADDR_55]] line:22:1 also_after[implementation={extension(allow_templates)}] 'int (char)' // CHECK-NEXT: | |-TemplateArgument type 'char' // CHECK-NEXT: | | `-BuiltinType [[ADDR_48]] 'char' -// CHECK-NEXT: | |-ParmVarDecl [[ADDR_60:0x[a-z0-9]*]] col:17 'char':'char' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_60:0x[a-z0-9]*]] col:17 'char' // CHECK-NEXT: | `-CompoundStmt [[ADDR_61:0x[a-z0-9]*]] // CHECK-NEXT: | `-ReturnStmt [[ADDR_62:0x[a-z0-9]*]] // CHECK-NEXT: | `-IntegerLiteral [[ADDR_59]] 'int' 0 @@ -213,7 +213,7 @@ int test() { // CHECK-NEXT: | `-FunctionDecl [[ADDR_128:0x[a-z0-9]*]] line:48:5 used also_after_mismatch 'int (int)' // CHECK-NEXT: | |-TemplateArgument type 'int' // CHECK-NEXT: | | `-BuiltinType [[ADDR_129:0x[a-z0-9]*]] 'int' -// CHECK-NEXT: | |-ParmVarDecl [[ADDR_130:0x[a-z0-9]*]] col:26 'int':'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_130:0x[a-z0-9]*]] col:26 'int' // CHECK-NEXT: | `-CompoundStmt [[ADDR_131:0x[a-z0-9]*]] // CHECK-NEXT: | `-ReturnStmt [[ADDR_132:0x[a-z0-9]*]] // CHECK-NEXT: | `-IntegerLiteral [[ADDR_127]] 'int' 0 @@ -244,12 +244,12 @@ int test() { // CHECK-NEXT: | | |-CallExpr [[ADDR_155:0x[a-z0-9]*]] 'int' // CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_156:0x[a-z0-9]*]] 'int (*)(char)' // CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_157:0x[a-z0-9]*]] 'int (char)' {{.*}}Function [[ADDR_47]] 'also_after' 'int (char)' (FunctionTemplate [[ADDR_114]] 'also_after') -// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_158:0x[a-z0-9]*]] 'char':'char' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_158:0x[a-z0-9]*]] 'char' // CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_159:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | `-CallExpr [[ADDR_160:0x[a-z0-9]*]] 'int' // CHECK-NEXT: | | |-ImplicitCastExpr [[ADDR_161:0x[a-z0-9]*]] 'int (*)(char)' // CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_54]] 'int (char)' {{.*}}Function [[ADDR_55]] 'also_after[implementation={extension(allow_templates)}]' 'int (char)' -// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_162:0x[a-z0-9]*]] 'char':'char' +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_162:0x[a-z0-9]*]] 'char' // CHECK-NEXT: | | `-IntegerLiteral [[ADDR_159]] 'int' 0 // CHECK-NEXT: | `-CallExpr [[ADDR_163:0x[a-z0-9]*]] 'int' // CHECK-NEXT: | |-ImplicitCastExpr [[ADDR_164:0x[a-z0-9]*]] 'int (*)(int)' diff --git a/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_3.cpp b/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_3.cpp index 60619801735f2..ad269506042c8 100644 --- a/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_3.cpp +++ b/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_3.cpp @@ -97,14 +97,14 @@ int test() { // CHECK-NEXT: | |-FunctionDecl [[ADDR_32:0x[a-z0-9]*]] line:10:5 used also_before 'int (int)' // CHECK-NEXT: | | |-TemplateArgument type 'int' // CHECK-NEXT: | | | `-BuiltinType [[ADDR_9]] 'int' -// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_33:0x[a-z0-9]*]] col:19 s 'int':'int' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_33:0x[a-z0-9]*]] col:19 s 'int' // CHECK-NEXT: | | `-CompoundStmt [[ADDR_34:0x[a-z0-9]*]] // CHECK-NEXT: | | `-ReturnStmt [[ADDR_35:0x[a-z0-9]*]] // CHECK-NEXT: | | `-IntegerLiteral [[ADDR_28]] 'int' 0 // CHECK-NEXT: | `-FunctionDecl [[ADDR_36:0x[a-z0-9]*]] line:10:5 used also_before 'int (double)' // CHECK-NEXT: | |-TemplateArgument type 'double' // CHECK-NEXT: | | `-BuiltinType [[ADDR_21]] 'double' -// CHECK-NEXT: | |-ParmVarDecl [[ADDR_37:0x[a-z0-9]*]] col:19 s 'double':'double' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_37:0x[a-z0-9]*]] col:19 s 'double' // CHECK-NEXT: | `-CompoundStmt [[ADDR_38:0x[a-z0-9]*]] // CHECK-NEXT: | `-ReturnStmt [[ADDR_39:0x[a-z0-9]*]] // CHECK-NEXT: | `-IntegerLiteral [[ADDR_28]] 'int' 0 @@ -116,7 +116,7 @@ int test() { // CHECK-NEXT: | | |-DeclStmt [[ADDR_44:0x[a-z0-9]*]] // CHECK-NEXT: | | | `-VarDecl [[ADDR_45:0x[a-z0-9]*]] col:10 referenced t 'double' // CHECK-NEXT: | | |-DeclStmt [[ADDR_46:0x[a-z0-9]*]] -// CHECK-NEXT: | | | `-VarDecl [[ADDR_47:0x[a-z0-9]*]] col:8 q 'S':'S' callinit +// CHECK-NEXT: | | | `-VarDecl [[ADDR_47:0x[a-z0-9]*]] col:8 q 'S' callinit // CHECK-NEXT: | | | `-ParenListExpr [[ADDR_48:0x[a-z0-9]*]] 'NULL TYPE' // CHECK-NEXT: | | | |-IntegerLiteral [[ADDR_49:0x[a-z0-9]*]] 'int' 1 // CHECK-NEXT: | | | `-UnaryOperator [[ADDR_50:0x[a-z0-9]*]] 'double *' prefix '&' cannot overflow @@ -126,11 +126,11 @@ int test() { // CHECK-NEXT: | |-FunctionDecl [[ADDR_54:0x[a-z0-9]*]] line:16:1 also_before[implementation={extension(allow_templates)}] 'int (S)' // CHECK-NEXT: | | |-TemplateArgument type 'int' // CHECK-NEXT: | | | `-BuiltinType [[ADDR_9]] 'int' -// CHECK-NEXT: | | `-ParmVarDecl [[ADDR_55:0x[a-z0-9]*]] col:22 s 'S':'S' +// CHECK-NEXT: | | `-ParmVarDecl [[ADDR_55:0x[a-z0-9]*]] col:22 s 'S' // CHECK-NEXT: | `-FunctionDecl [[ADDR_56:0x[a-z0-9]*]] line:16:1 also_before[implementation={extension(allow_templates)}] 'int (S)' // CHECK-NEXT: | |-TemplateArgument type 'double' // CHECK-NEXT: | | `-BuiltinType [[ADDR_21]] 'double' -// CHECK-NEXT: | `-ParmVarDecl [[ADDR_57:0x[a-z0-9]*]] col:22 s 'S':'S' +// CHECK-NEXT: | `-ParmVarDecl [[ADDR_57:0x[a-z0-9]*]] col:22 s 'S' // CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_58:0x[a-z0-9]*]] col:5 implicit special // CHECK-NEXT: | |-TemplateTypeParmDecl [[ADDR_59:0x[a-z0-9]*]] col:20 referenced typename depth 0 index 0 T // CHECK-NEXT: | |-FunctionDecl [[ADDR_60:0x[a-z0-9]*]] col:5 special 'int (S)' @@ -140,7 +140,7 @@ int test() { // CHECK-NEXT: | `-FunctionDecl [[ADDR_65:0x[a-z0-9]*]] col:5 used special 'int (S)' // CHECK-NEXT: | |-TemplateArgument type 'int' // CHECK-NEXT: | | `-BuiltinType [[ADDR_9]] 'int' -// CHECK-NEXT: | |-ParmVarDecl [[ADDR_66:0x[a-z0-9]*]] col:18 s 'S':'S' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_66:0x[a-z0-9]*]] col:18 s 'S' // CHECK-NEXT: | `-OMPDeclareVariantAttr [[ADDR_67:0x[a-z0-9]*]] <> Implicit implementation={extension(allow_templates)} // CHECK-NEXT: | `-DeclRefExpr [[ADDR_68:0x[a-z0-9]*]] 'int (S)' {{.*}}Function [[ADDR_69:0x[a-z0-9]*]] 'special[implementation={extension(allow_templates)}]' 'int (S)' // CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_70:0x[a-z0-9]*]] line:23:1 special[implementation={extension(allow_templates)}] @@ -151,7 +151,7 @@ int test() { // CHECK-NEXT: | | |-DeclStmt [[ADDR_72:0x[a-z0-9]*]] // CHECK-NEXT: | | | `-VarDecl [[ADDR_73:0x[a-z0-9]*]] col:5 referenced t 'T' // CHECK-NEXT: | | |-DeclStmt [[ADDR_74:0x[a-z0-9]*]] -// CHECK-NEXT: | | | `-VarDecl [[ADDR_75:0x[a-z0-9]*]] col:8 q 'S':'S' callinit +// CHECK-NEXT: | | | `-VarDecl [[ADDR_75:0x[a-z0-9]*]] col:8 q 'S' callinit // CHECK-NEXT: | | | `-ParenListExpr [[ADDR_76:0x[a-z0-9]*]] 'NULL TYPE' // CHECK-NEXT: | | | |-IntegerLiteral [[ADDR_77:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | `-UnaryOperator [[ADDR_78:0x[a-z0-9]*]] '' prefix '&' cannot overflow @@ -161,16 +161,16 @@ int test() { // CHECK-NEXT: | `-FunctionDecl [[ADDR_69]] line:23:1 special[implementation={extension(allow_templates)}] 'int (S)' // CHECK-NEXT: | |-TemplateArgument type 'int' // CHECK-NEXT: | | `-BuiltinType [[ADDR_9]] 'int' -// CHECK-NEXT: | |-ParmVarDecl [[ADDR_82:0x[a-z0-9]*]] col:18 s 'S':'S' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_82:0x[a-z0-9]*]] col:18 s 'S' // CHECK-NEXT: | `-CompoundStmt [[ADDR_83:0x[a-z0-9]*]] // CHECK-NEXT: | |-DeclStmt [[ADDR_84:0x[a-z0-9]*]] -// CHECK-NEXT: | | `-VarDecl [[ADDR_85:0x[a-z0-9]*]] col:5 used t 'int':'int' +// CHECK-NEXT: | | `-VarDecl [[ADDR_85:0x[a-z0-9]*]] col:5 used t 'int' // CHECK-NEXT: | |-DeclStmt [[ADDR_86:0x[a-z0-9]*]] -// CHECK-NEXT: | | `-VarDecl [[ADDR_87:0x[a-z0-9]*]] col:8 q 'S':'S' callinit -// CHECK-NEXT: | | `-CXXConstructExpr [[ADDR_88:0x[a-z0-9]*]] 'S':'S' 'void (int, int *)' +// CHECK-NEXT: | | `-VarDecl [[ADDR_87:0x[a-z0-9]*]] col:8 q 'S' callinit +// CHECK-NEXT: | | `-CXXConstructExpr [[ADDR_88:0x[a-z0-9]*]] 'S' 'void (int, int *)' // CHECK-NEXT: | | |-IntegerLiteral [[ADDR_77]] 'int' 0 // CHECK-NEXT: | | `-UnaryOperator [[ADDR_89:0x[a-z0-9]*]] 'int *' prefix '&' cannot overflow -// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_90:0x[a-z0-9]*]] 'int':'int' {{.*}}Var [[ADDR_85]] 't' 'int':'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_90:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_85]] 't' 'int' // CHECK-NEXT: | `-ReturnStmt [[ADDR_91:0x[a-z0-9]*]] // CHECK-NEXT: | `-IntegerLiteral [[ADDR_81]] 'int' 0 // CHECK-NEXT: |-FunctionTemplateDecl [[ADDR_92:0x[a-z0-9]*]] col:5 implicit also_after @@ -187,7 +187,7 @@ int test() { // CHECK-NEXT: | |-DeclStmt [[ADDR_101:0x[a-z0-9]*]] // CHECK-NEXT: | | `-VarDecl [[ADDR_102:0x[a-z0-9]*]] col:10 referenced t 'double' // CHECK-NEXT: | |-DeclStmt [[ADDR_103:0x[a-z0-9]*]] -// CHECK-NEXT: | | `-VarDecl [[ADDR_104:0x[a-z0-9]*]] col:8 q 'S':'S' callinit +// CHECK-NEXT: | | `-VarDecl [[ADDR_104:0x[a-z0-9]*]] col:8 q 'S' callinit // CHECK-NEXT: | | `-ParenListExpr [[ADDR_105:0x[a-z0-9]*]] 'NULL TYPE' // CHECK-NEXT: | | |-FloatingLiteral [[ADDR_106:0x[a-z0-9]*]] 'double' 2.000000e+00 // CHECK-NEXT: | | `-UnaryOperator [[ADDR_107:0x[a-z0-9]*]] 'double *' prefix '&' cannot overflow @@ -204,14 +204,14 @@ int test() { // CHECK-NEXT: | |-FunctionDecl [[ADDR_118:0x[a-z0-9]*]] line:38:5 used also_after 'int (int)' // CHECK-NEXT: | | |-TemplateArgument type 'int' // CHECK-NEXT: | | | `-BuiltinType [[ADDR_9]] 'int' -// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_119:0x[a-z0-9]*]] col:18 s 'int':'int' +// CHECK-NEXT: | | |-ParmVarDecl [[ADDR_119:0x[a-z0-9]*]] col:18 s 'int' // CHECK-NEXT: | | `-CompoundStmt [[ADDR_120:0x[a-z0-9]*]] // CHECK-NEXT: | | `-ReturnStmt [[ADDR_121:0x[a-z0-9]*]] // CHECK-NEXT: | | `-IntegerLiteral [[ADDR_117]] 'int' 0 // CHECK-NEXT: | `-FunctionDecl [[ADDR_122:0x[a-z0-9]*]] line:38:5 used also_after 'int (double)' // CHECK-NEXT: | |-TemplateArgument type 'double' // CHECK-NEXT: | | `-BuiltinType [[ADDR_21]] 'double' -// CHECK-NEXT: | |-ParmVarDecl [[ADDR_123:0x[a-z0-9]*]] col:18 s 'double':'double' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_123:0x[a-z0-9]*]] col:18 s 'double' // CHECK-NEXT: | `-CompoundStmt [[ADDR_124:0x[a-z0-9]*]] // CHECK-NEXT: | `-ReturnStmt [[ADDR_125:0x[a-z0-9]*]] // CHECK-NEXT: | `-IntegerLiteral [[ADDR_117]] 'int' 0 @@ -243,19 +243,19 @@ int test() { // CHECK-NEXT: |-CallExpr [[ADDR_151:0x[a-z0-9]*]] 'int' // CHECK-NEXT: | |-ImplicitCastExpr [[ADDR_152:0x[a-z0-9]*]] 'int (*)(S)' // CHECK-NEXT: | | `-DeclRefExpr [[ADDR_153:0x[a-z0-9]*]] 'int (S)' {{.*}}Function [[ADDR_65]] 'special' 'int (S)' (FunctionTemplate [[ADDR_58]] 'special') -// CXX11-NEXT: | `-CXXConstructExpr [[ADDR_154:0x[a-z0-9]*]] 'S':'S' 'void (S &&) noexcept' elidable -// CXX11-NEXT: | `-MaterializeTemporaryExpr [[ADDR_155:0x[a-z0-9]*]] 'S':'S' xvalue -// CHECK-NEXT: | `-CXXTemporaryObjectExpr [[ADDR_156:0x[a-z0-9]*]] 'S':'S' 'void (int, int *)' +// CXX11-NEXT: | `-CXXConstructExpr [[ADDR_154:0x[a-z0-9]*]] 'S' 'void (S &&) noexcept' elidable +// CXX11-NEXT: | `-MaterializeTemporaryExpr [[ADDR_155:0x[a-z0-9]*]] 'S' xvalue +// CHECK-NEXT: | `-CXXTemporaryObjectExpr [[ADDR_156:0x[a-z0-9]*]] 'S' 'void (int, int *)' // CHECK-NEXT: | |-IntegerLiteral [[ADDR_157:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_158:0x[a-z0-9]*]] 'int *' // CHECK-NEXT: | `-IntegerLiteral [[ADDR_159:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: `-CallExpr [[ADDR_160:0x[a-z0-9]*]] 'int' // CHECK-NEXT: |-ImplicitCastExpr [[ADDR_161:0x[a-z0-9]*]] 'int (*)(S)' // CHECK-NEXT: | `-DeclRefExpr [[ADDR_68]] 'int (S)' {{.*}}Function [[ADDR_69]] 'special[implementation={extension(allow_templates)}]' 'int (S)' -// CXX11-NEXT: `-CXXConstructExpr [[ADDR_162:0x[a-z0-9]*]] 'S':'S' 'void (S &&) noexcept' elidable -// CXX11-NEXT: `-MaterializeTemporaryExpr [[ADDR_163:0x[a-z0-9]*]] 'S':'S' xvalue -// CXX11-NEXT: `-CXXTemporaryObjectExpr [[ADDR_156]] 'S':'S' 'void (int, int *)' -// CXX17-NEXT: `-CXXTemporaryObjectExpr [[ADDR_156]] 'S':'S' 'void (int, int *)' +// CXX11-NEXT: `-CXXConstructExpr [[ADDR_162:0x[a-z0-9]*]] 'S' 'void (S &&) noexcept' elidable +// CXX11-NEXT: `-MaterializeTemporaryExpr [[ADDR_163:0x[a-z0-9]*]] 'S' xvalue +// CXX11-NEXT: `-CXXTemporaryObjectExpr [[ADDR_156]] 'S' 'void (int, int *)' +// CXX17-NEXT: `-CXXTemporaryObjectExpr [[ADDR_156]] 'S' 'void (int, int *)' // CHECK-NEXT: |-IntegerLiteral [[ADDR_157]] 'int' 0 // CHECK-NEXT: `-ImplicitCastExpr [[ADDR_158]] 'int *' // CHECK-NEXT: `-IntegerLiteral [[ADDR_159]] 'int' 0 diff --git a/clang/test/AST/ast-dump-overloaded-operators.cpp b/clang/test/AST/ast-dump-overloaded-operators.cpp index dc6d7bdc9b085..639a0d9874eb0 100644 --- a/clang/test/AST/ast-dump-overloaded-operators.cpp +++ b/clang/test/AST/ast-dump-overloaded-operators.cpp @@ -31,14 +31,14 @@ void test() { // CHECK-NEXT: |-CXXOperatorCallExpr {{.*}} 'void' '+' // CHECK-NEXT: | |-ImplicitCastExpr {{.*}} 'void (*)(E, E)' // CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'void (E, E)' lvalue Function {{.*}} 'operator+' 'void (E, E)' -// CHECK-NEXT: | |-ImplicitCastExpr {{.*}} 'E':'E' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'E':'E' lvalue Var {{.*}} 'e' 'E' -// CHECK-NEXT: | `-ImplicitCastExpr {{.*}} 'E':'E' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'E':'E' lvalue Var {{.*}} 'e' 'E' +// CHECK-NEXT: | |-ImplicitCastExpr {{.*}} 'E' +// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'E' lvalue Var {{.*}} 'e' 'E' +// CHECK-NEXT: | `-ImplicitCastExpr {{.*}} 'E' +// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'E' lvalue Var {{.*}} 'e' 'E' // CHECK-NEXT: `-CXXOperatorCallExpr {{.*}} 'void' ',' // CHECK-NEXT: |-ImplicitCastExpr {{.*}} 'void (*)(E, E)' // CHECK-NEXT: | `-DeclRefExpr {{.*}} 'void (E, E)' lvalue Function {{.*}} 'operator,' 'void (E, E)' -// CHECK-NEXT: |-ImplicitCastExpr {{.*}} 'E':'E' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'E':'E' lvalue Var {{.*}} 'e' 'E' -// CHECK-NEXT: `-ImplicitCastExpr {{.*}} 'E':'E' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'E':'E' lvalue Var {{.*}} 'e' 'E' +// CHECK-NEXT: |-ImplicitCastExpr {{.*}} 'E' +// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'E' lvalue Var {{.*}} 'e' 'E' +// CHECK-NEXT: `-ImplicitCastExpr {{.*}} 'E' +// CHECK-NEXT: `-DeclRefExpr {{.*}} 'E' lvalue Var {{.*}} 'e' 'E' diff --git a/clang/test/AST/ast-dump-records-json.cpp b/clang/test/AST/ast-dump-records-json.cpp index bc53d03176f66..a7eb8771d3f02 100644 --- a/clang/test/AST/ast-dump-records-json.cpp +++ b/clang/test/AST/ast-dump-records-json.cpp @@ -3266,7 +3266,6 @@ struct Derived6 : virtual public Bases... { // CHECK-NEXT: { // CHECK-NEXT: "access": "public", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Base1", // CHECK-NEXT: "qualType": "Base1" // CHECK-NEXT: }, // CHECK-NEXT: "writtenAccess": "none" @@ -3378,7 +3377,6 @@ struct Derived6 : virtual public Bases... { // CHECK-NEXT: { // CHECK-NEXT: "access": "private", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Base1", // CHECK-NEXT: "qualType": "Base1" // CHECK-NEXT: }, // CHECK-NEXT: "writtenAccess": "private" @@ -3479,7 +3477,6 @@ struct Derived6 : virtual public Bases... { // CHECK-NEXT: "access": "public", // CHECK-NEXT: "isVirtual": true, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Base1", // CHECK-NEXT: "qualType": "Base1" // CHECK-NEXT: }, // CHECK-NEXT: "writtenAccess": "none" @@ -3718,7 +3715,6 @@ struct Derived6 : virtual public Bases... { // CHECK-NEXT: { // CHECK-NEXT: "access": "public", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Base1", // CHECK-NEXT: "qualType": "Base1" // CHECK-NEXT: }, // CHECK-NEXT: "writtenAccess": "none" @@ -3727,7 +3723,6 @@ struct Derived6 : virtual public Bases... { // CHECK-NEXT: "access": "public", // CHECK-NEXT: "isVirtual": true, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Base2", // CHECK-NEXT: "qualType": "Base2" // CHECK-NEXT: }, // CHECK-NEXT: "writtenAccess": "none" @@ -3735,7 +3730,6 @@ struct Derived6 : virtual public Bases... { // CHECK-NEXT: { // CHECK-NEXT: "access": "protected", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Base3", // CHECK-NEXT: "qualType": "Base3" // CHECK-NEXT: }, // CHECK-NEXT: "writtenAccess": "protected" @@ -3975,7 +3969,6 @@ struct Derived6 : virtual public Bases... { // CHECK-NEXT: "access": "protected", // CHECK-NEXT: "isVirtual": true, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Base1", // CHECK-NEXT: "qualType": "Base1" // CHECK-NEXT: }, // CHECK-NEXT: "writtenAccess": "protected" diff --git a/clang/test/AST/ast-dump-records.cpp b/clang/test/AST/ast-dump-records.cpp index 7a93b69d37ba6..bfd8892698d4b 100644 --- a/clang/test/AST/ast-dump-records.cpp +++ b/clang/test/AST/ast-dump-records.cpp @@ -301,10 +301,10 @@ class NonTrivial { struct CheckFullExpression { // CHECK: |-CXXRecordDecl {{.*}} struct CheckFullExpression definition NonTrivial value = NonTrivial(); -// CHECK: | |-FieldDecl {{.*}} value 'NonTrivial':'NonTrivial' -// CHECK-NEXT: | | `-ExprWithCleanups {{.*}} 'NonTrivial':'NonTrivial' -// CHECK-NEXT: | | `-CXXBindTemporaryExpr {{.*}} 'NonTrivial':'NonTrivial' (CXXTemporary{{.*}}) -// CHECK-NEXT: | | `-CXXTemporaryObjectExpr {{.*}} 'NonTrivial':'NonTrivial' 'void ()' +// CHECK: | |-FieldDecl {{.*}} value 'NonTrivial' +// CHECK-NEXT: | | `-ExprWithCleanups {{.*}} 'NonTrivial' +// CHECK-NEXT: | | `-CXXBindTemporaryExpr {{.*}} 'NonTrivial' (CXXTemporary{{.*}}) +// CHECK-NEXT: | | `-CXXTemporaryObjectExpr {{.*}} 'NonTrivial' 'void ()' }; struct CheckNoCleanup { diff --git a/clang/test/AST/ast-dump-recovery.cpp b/clang/test/AST/ast-dump-recovery.cpp index 278b9fc000b57..cfb013585ad74 100644 --- a/clang/test/AST/ast-dump-recovery.cpp +++ b/clang/test/AST/ast-dump-recovery.cpp @@ -202,27 +202,27 @@ void InvalidInitalizer(int x) { // CHECK-NEXT: `-InitListExpr Bar b2 = {1}; // CHECK: `-VarDecl {{.*}} b3 'Bar' - // CHECK-NEXT: `-RecoveryExpr {{.*}} 'Bar':'Bar' contains-errors + // CHECK-NEXT: `-RecoveryExpr {{.*}} 'Bar' contains-errors // CHECK-NEXT: `-DeclRefExpr {{.*}} 'x' 'int' Bar b3 = Bar(x); // CHECK: `-VarDecl {{.*}} b4 'Bar' - // CHECK-NEXT: `-RecoveryExpr {{.*}} 'Bar':'Bar' contains-errors + // CHECK-NEXT: `-RecoveryExpr {{.*}} 'Bar' contains-errors // CHECK-NEXT: `-InitListExpr {{.*}} 'void' // CHECK-NEXT: `-DeclRefExpr {{.*}} 'x' 'int' Bar b4 = Bar{x}; // CHECK: `-VarDecl {{.*}} b5 'Bar' - // CHECK-NEXT: `-CXXUnresolvedConstructExpr {{.*}} 'Bar':'Bar' contains-errors 'Bar' + // CHECK-NEXT: `-CXXUnresolvedConstructExpr {{.*}} 'Bar' contains-errors 'Bar' // CHECK-NEXT: `-RecoveryExpr {{.*}} contains-errors // CHECK-NEXT: `-UnresolvedLookupExpr {{.*}} 'invalid' Bar b5 = Bar(invalid()); // CHECK: `-VarDecl {{.*}} b6 'Bar' - // CHECK-NEXT: `-CXXUnresolvedConstructExpr {{.*}} 'Bar':'Bar' contains-errors 'Bar' + // CHECK-NEXT: `-CXXUnresolvedConstructExpr {{.*}} 'Bar' contains-errors 'Bar' // CHECK-NEXT: `-InitListExpr {{.*}} contains-errors // CHECK-NEXT: `-RecoveryExpr {{.*}} contains-errors // CHECK-NEXT: `-UnresolvedLookupExpr {{.*}} 'invalid' Bar b6 = Bar{invalid()}; - // CHECK: RecoveryExpr {{.*}} 'Bar':'Bar' contains-errors + // CHECK: RecoveryExpr {{.*}} 'Bar' contains-errors // CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1 Bar(1); @@ -326,7 +326,7 @@ void CtorInitializer() { // CHECK-NEXT: | `-RecoveryExpr {{.*}} '' // CHECK-NEXT: | `-UnresolvedLookupExpr {{.*}} '' // CHECK-NEXT: |-CXXCtorInitializer Field {{.*}} 's' 'S' - // CHECK-NEXT: | `-RecoveryExpr {{.*}} 'S':'S' contains-errors + // CHECK-NEXT: | `-RecoveryExpr {{.*}} 'S' contains-errors // CHECK-NEXT: | |-IntegerLiteral {{.*}} 1 // CHECK-NEXT: | `-IntegerLiteral {{.*}} 2 }; diff --git a/clang/test/AST/ast-dump-stmt-json.cpp b/clang/test/AST/ast-dump-stmt-json.cpp index 8e42a2e276b46..667a12a012024 100644 --- a/clang/test/AST/ast-dump-stmt-json.cpp +++ b/clang/test/AST/ast-dump-stmt-json.cpp @@ -2265,7 +2265,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "obj", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "DependentScopeMemberExprWrapper", // CHECK-NEXT: "qualType": "DependentScopeMemberExprWrapper" // CHECK-NEXT: } // CHECK-NEXT: } @@ -2331,7 +2330,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "DependentScopeMemberExprWrapper", // CHECK-NEXT: "qualType": "DependentScopeMemberExprWrapper" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -2340,7 +2338,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "obj", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "DependentScopeMemberExprWrapper", // CHECK-NEXT: "qualType": "DependentScopeMemberExprWrapper" // CHECK-NEXT: } // CHECK-NEXT: } @@ -2429,7 +2426,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "DependentScopeMemberExprWrapper", // CHECK-NEXT: "qualType": "DependentScopeMemberExprWrapper" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -2438,7 +2434,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "obj", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "DependentScopeMemberExprWrapper", // CHECK-NEXT: "qualType": "DependentScopeMemberExprWrapper" // CHECK-NEXT: } // CHECK-NEXT: } @@ -2593,7 +2588,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "DependentScopeMemberExprWrapper", // CHECK-NEXT: "qualType": "DependentScopeMemberExprWrapper" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -2602,7 +2596,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "obj", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "DependentScopeMemberExprWrapper", // CHECK-NEXT: "qualType": "DependentScopeMemberExprWrapper" // CHECK-NEXT: } // CHECK-NEXT: } @@ -2779,7 +2772,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "obj", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "OtherDependentScopeMemberExprWrapper", // CHECK-NEXT: "qualType": "OtherDependentScopeMemberExprWrapper" // CHECK-NEXT: } // CHECK-NEXT: } @@ -2867,7 +2859,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "OtherDependentScopeMemberExprWrapper", // CHECK-NEXT: "qualType": "OtherDependentScopeMemberExprWrapper" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -2876,7 +2867,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "obj", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "OtherDependentScopeMemberExprWrapper", // CHECK-NEXT: "qualType": "OtherDependentScopeMemberExprWrapper" // CHECK-NEXT: } // CHECK-NEXT: } @@ -3037,7 +3027,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "U", // CHECK-NEXT: "qualType": "U" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -3066,7 +3055,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "U", // CHECK-NEXT: "qualType": "U" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -4563,7 +4551,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__begin1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "init": "c", @@ -4663,7 +4650,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__end1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "init": "c", @@ -4804,7 +4790,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -4826,7 +4811,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -4835,7 +4819,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "__begin1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } // CHECK-NEXT: } @@ -4858,7 +4841,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -4880,7 +4862,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -4889,7 +4870,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "__end1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } // CHECK-NEXT: } @@ -4914,7 +4894,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -4937,7 +4916,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -4946,7 +4924,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "__begin1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } // CHECK-NEXT: } @@ -5055,7 +5032,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -5077,7 +5053,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -5086,7 +5061,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "__begin1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } // CHECK-NEXT: } @@ -5160,7 +5134,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "C", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Container", // CHECK-NEXT: "qualType": "Container" // CHECK-NEXT: }, // CHECK-NEXT: "init": "call", @@ -5181,7 +5154,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Container", // CHECK-NEXT: "qualType": "Container" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -5275,7 +5247,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Container", // CHECK-NEXT: "qualType": "Container" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -5284,7 +5255,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "C", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Container", // CHECK-NEXT: "qualType": "Container" // CHECK-NEXT: } // CHECK-NEXT: } @@ -5333,7 +5303,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__begin1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "init": "c", @@ -5418,7 +5387,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Container", // CHECK-NEXT: "qualType": "Container" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -5481,7 +5449,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__end1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "init": "c", @@ -5566,7 +5533,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Container", // CHECK-NEXT: "qualType": "Container" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -5626,7 +5592,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -5648,7 +5613,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -5657,7 +5621,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "__begin1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } // CHECK-NEXT: } @@ -5680,7 +5643,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -5702,7 +5664,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -5711,7 +5672,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "__end1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } // CHECK-NEXT: } @@ -5736,7 +5696,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -5759,7 +5718,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -5768,7 +5726,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "__begin1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } // CHECK-NEXT: } @@ -5877,7 +5834,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -5899,7 +5855,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -5908,7 +5863,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "__begin1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } // CHECK-NEXT: } @@ -6120,7 +6074,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__begin1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "init": "c", @@ -6220,7 +6173,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__end1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "init": "c", @@ -6361,7 +6313,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -6383,7 +6334,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -6392,7 +6342,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "__begin1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } // CHECK-NEXT: } @@ -6415,7 +6364,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -6437,7 +6385,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -6446,7 +6393,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "__end1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } // CHECK-NEXT: } @@ -6471,7 +6417,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -6494,7 +6439,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -6503,7 +6447,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "__begin1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } // CHECK-NEXT: } @@ -6612,7 +6555,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -6634,7 +6576,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -6643,7 +6584,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "kind": "VarDecl", // CHECK-NEXT: "name": "__begin1", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-stmt.cpp b/clang/test/AST/ast-dump-stmt.cpp index 4be66756e6c1e..407584e5b82de 100644 --- a/clang/test/AST/ast-dump-stmt.cpp +++ b/clang/test/AST/ast-dump-stmt.cpp @@ -99,8 +99,8 @@ void TestUnionInitList() U us[3] = {1}; // CHECK: VarDecl {{.+}} col:5 us 'U[3]' cinit // CHECK-NEXT: `-InitListExpr {{.+}} 'U[3]' -// CHECK-NEXT: |-array_filler: InitListExpr {{.+}} 'U':'U' field Field {{.+}} 'i' 'int' -// CHECK-NEXT: `-InitListExpr {{.+}} 'U':'U' field Field {{.+}} 'i' 'int' +// CHECK-NEXT: |-array_filler: InitListExpr {{.+}} 'U' field Field {{.+}} 'i' 'int' +// CHECK-NEXT: `-InitListExpr {{.+}} 'U' field Field {{.+}} 'i' 'int' // CHECK-NEXT: `-IntegerLiteral {{.+}} 'int' 1 } @@ -198,28 +198,28 @@ void TestIteration() { // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:16 implicit used __range1 'int (&)[10]' cinit // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int[10]' lvalue Var 0x{{[^ ]*}} 'vals' 'int[10]' // CHECK-NEXT: DeclStmt - // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:14 implicit used __begin1 'int *':'int *' cinit + // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:14 implicit used __begin1 'int *' cinit // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int[10]' lvalue Var 0x{{[^ ]*}} '__range1' 'int (&)[10]' // CHECK-NEXT: DeclStmt - // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:14 implicit used __end1 'int *':'int *' cinit + // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:14 implicit used __end1 'int *' cinit // CHECK-NEXT: BinaryOperator 0x{{[^ ]*}} 'int *' '+' // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int[10]' lvalue Var 0x{{[^ ]*}} '__range1' 'int (&)[10]' // CHECK-NEXT: IntegerLiteral 0x{{[^ ]*}} 'long' 10 // CHECK-NEXT: BinaryOperator 0x{{[^ ]*}} 'bool' '!=' // CHECK-NEXT: ImplicitCastExpr - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *':'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *':'int *' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *' // CHECK-NEXT: ImplicitCastExpr - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *':'int *' lvalue Var 0x{{[^ ]*}} '__end1' 'int *':'int *' - // CHECK-NEXT: UnaryOperator 0x{{[^ ]*}} 'int *':'int *' lvalue prefix '++' - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *':'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *':'int *' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *' lvalue Var 0x{{[^ ]*}} '__end1' 'int *' + // CHECK-NEXT: UnaryOperator 0x{{[^ ]*}} 'int *' lvalue prefix '++' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *' // CHECK-NEXT: DeclStmt // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:12 v 'int' cinit // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: UnaryOperator 0x{{[^ ]*}} 'int' lvalue prefix '*' cannot overflow // CHECK-NEXT: ImplicitCastExpr - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *':'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *':'int *' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *' // CHECK-NEXT: NullStmt Container C; @@ -229,32 +229,32 @@ void TestIteration() { // CHECK-NEXT: <<>> // CHECK-NEXT: DeclStmt // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:16 implicit used __range1 'Container &' cinit - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'Container':'Container' lvalue Var 0x{{[^ ]*}} 'C' 'Container':'Container' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'Container' lvalue Var 0x{{[^ ]*}} 'C' 'Container' // CHECK-NEXT: DeclStmt - // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:14 implicit used __begin1 'int *':'int *' cinit + // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:14 implicit used __begin1 'int *' cinit // CHECK-NEXT: CXXMemberCallExpr 0x{{[^ ]*}} 'int *' // CHECK-NEXT: MemberExpr 0x{{[^ ]*}} '' .begin 0x{{[^ ]*}} // CHECK-NEXT: ImplicitCastExpr - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'Container':'Container' lvalue Var 0x{{[^ ]*}} '__range1' 'Container &' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'Container' lvalue Var 0x{{[^ ]*}} '__range1' 'Container &' // CHECK-NEXT: DeclStmt - // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:14 implicit used __end1 'int *':'int *' cinit + // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:14 implicit used __end1 'int *' cinit // CHECK-NEXT: CXXMemberCallExpr 0x{{[^ ]*}} 'int *' // CHECK-NEXT: MemberExpr 0x{{[^ ]*}} '' .end 0x{{[^ ]*}} // CHECK-NEXT: ImplicitCastExpr - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'Container':'Container' lvalue Var 0x{{[^ ]*}} '__range1' 'Container &' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'Container' lvalue Var 0x{{[^ ]*}} '__range1' 'Container &' // CHECK-NEXT: BinaryOperator 0x{{[^ ]*}} 'bool' '!=' // CHECK-NEXT: ImplicitCastExpr - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *':'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *':'int *' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *' // CHECK-NEXT: ImplicitCastExpr - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *':'int *' lvalue Var 0x{{[^ ]*}} '__end1' 'int *':'int *' - // CHECK-NEXT: UnaryOperator 0x{{[^ ]*}} 'int *':'int *' lvalue prefix '++' - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *':'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *':'int *' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *' lvalue Var 0x{{[^ ]*}} '__end1' 'int *' + // CHECK-NEXT: UnaryOperator 0x{{[^ ]*}} 'int *' lvalue prefix '++' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *' // CHECK-NEXT: DeclStmt // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:12 v 'int' cinit // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: UnaryOperator 0x{{[^ ]*}} 'int' lvalue prefix '*' cannot overflow // CHECK-NEXT: ImplicitCastExpr - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *':'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *':'int *' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *' // CHECK-NEXT: NullStmt for (int a; int v : vals) @@ -266,27 +266,27 @@ void TestIteration() { // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:23 implicit used __range1 'int (&)[10]' cinit // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int[10]' lvalue Var 0x{{[^ ]*}} 'vals' 'int[10]' // CHECK-NEXT: DeclStmt - // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:21 implicit used __begin1 'int *':'int *' cinit + // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:21 implicit used __begin1 'int *' cinit // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int[10]' lvalue Var 0x{{[^ ]*}} '__range1' 'int (&)[10]' // CHECK-NEXT: DeclStmt - // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:21 implicit used __end1 'int *':'int *' cinit + // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:21 implicit used __end1 'int *' cinit // CHECK-NEXT: BinaryOperator 0x{{[^ ]*}} 'int *' '+' // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int[10]' lvalue Var 0x{{[^ ]*}} '__range1' 'int (&)[10]' // CHECK-NEXT: IntegerLiteral 0x{{[^ ]*}} 'long' 10 // CHECK-NEXT: BinaryOperator 0x{{[^ ]*}} 'bool' '!=' // CHECK-NEXT: ImplicitCastExpr - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *':'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *':'int *' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *' // CHECK-NEXT: ImplicitCastExpr - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *':'int *' lvalue Var 0x{{[^ ]*}} '__end1' 'int *':'int *' - // CHECK-NEXT: UnaryOperator 0x{{[^ ]*}} 'int *':'int *' lvalue prefix '++' - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *':'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *':'int *' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *' lvalue Var 0x{{[^ ]*}} '__end1' 'int *' + // CHECK-NEXT: UnaryOperator 0x{{[^ ]*}} 'int *' lvalue prefix '++' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *' // CHECK-NEXT: DeclStmt // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:19 v 'int' cinit // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: UnaryOperator 0x{{[^ ]*}} 'int' lvalue prefix '*' cannot overflow // CHECK-NEXT: ImplicitCastExpr - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *':'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *':'int *' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'int *' lvalue Var 0x{{[^ ]*}} '__begin1' 'int *' // CHECK-NEXT: NullStmt } diff --git a/clang/test/AST/ast-dump-stmt.m b/clang/test/AST/ast-dump-stmt.m index 1d349ece966f8..e0fc16b3fa825 100644 --- a/clang/test/AST/ast-dump-stmt.m +++ b/clang/test/AST/ast-dump-stmt.m @@ -55,4 +55,4 @@ id TestCompoundLiteral(id a) { // CHECK: FunctionDecl{{.*}}TestCompoundLiteral // CHECK: ExprWithCleanups // CHECK-NEXT: cleanup CompoundLiteralExpr -// CHECK: CompoundLiteralExpr{{.*}}'S':'S' lvalue +// CHECK: CompoundLiteralExpr{{.*}}'S' lvalue diff --git a/clang/test/AST/ast-dump-template-decls-json.cpp b/clang/test/AST/ast-dump-template-decls-json.cpp index f51ef937d91db..00a656cd05917 100644 --- a/clang/test/AST/ast-dump-template-decls-json.cpp +++ b/clang/test/AST/ast-dump-template-decls-json.cpp @@ -826,7 +826,6 @@ void i(); // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "Uy", // CHECK-NEXT: "qualType": "Uy" // CHECK-NEXT: } // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-template-decls.cpp b/clang/test/AST/ast-dump-template-decls.cpp index 847a437e0f85e..142bc9e6ad9a0 100644 --- a/clang/test/AST/ast-dump-template-decls.cpp +++ b/clang/test/AST/ast-dump-template-decls.cpp @@ -171,7 +171,7 @@ template class E {}; using test1 = D; // CHECK: TypeAliasDecl 0x{{[^ ]*}} col:7 test1 'D':'subst_default_argument::E>' // CHECK: TemplateSpecializationType 0x{{[^ ]*}} 'A' sugar A -// CHECK-NEXT: |-TemplateArgument type 'int':'int' +// CHECK-NEXT: |-TemplateArgument type 'int' // CHECK-NEXT: | `-SubstTemplateTypeParmType 0x{{[^ ]*}} 'int' sugar class depth 0 index 1 D2 // CHECK-NEXT: | |-TypeAliasTemplate 0x{{[^ ]*}} 'D' // CHECK-NEXT: | `-BuiltinType 0x{{[^ ]*}} 'int' diff --git a/clang/test/AST/ast-dump-template-json-win32-mangler-crash.cpp b/clang/test/AST/ast-dump-template-json-win32-mangler-crash.cpp index f6d1caa121f2a..8c03b58abb0ed 100644 --- a/clang/test/AST/ast-dump-template-json-win32-mangler-crash.cpp +++ b/clang/test/AST/ast-dump-template-json-win32-mangler-crash.cpp @@ -532,7 +532,6 @@ int main() // CHECK-NEXT: }, // CHECK-NEXT: "name": "type", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "integral_constant<_Ty, _Val>", // CHECK-NEXT: "qualType": "integral_constant<_Ty, _Val>" // CHECK-NEXT: }, // CHECK-NEXT: "inner": [ @@ -872,7 +871,6 @@ int main() // CHECK-NEXT: }, // CHECK-NEXT: "name": "bool_constant", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "integral_constant", // CHECK-NEXT: "qualType": "integral_constant" // CHECK-NEXT: }, // CHECK-NEXT: "inner": [ diff --git a/clang/test/AST/ast-dump-temporaries-json.cpp b/clang/test/AST/ast-dump-temporaries-json.cpp index a8b14de29fcf9..0fd2762cee1a7 100644 --- a/clang/test/AST/ast-dump-temporaries-json.cpp +++ b/clang/test/AST/ast-dump-temporaries-json.cpp @@ -36,7 +36,6 @@ void MaterializeTemp() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "const S", // CHECK-NEXT: "qualType": "const S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -58,7 +57,6 @@ void MaterializeTemp() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "const S", // CHECK-NEXT: "qualType": "const S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "lvalue", @@ -89,7 +87,6 @@ void MaterializeTemp() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "const S", // CHECK-NEXT: "qualType": "const S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -111,7 +108,6 @@ void MaterializeTemp() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", @@ -141,7 +137,6 @@ void MaterializeTemp() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "S", // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: }, // CHECK-NEXT: "valueCategory": "prvalue", diff --git a/clang/test/AST/ast-dump-types-json.cpp b/clang/test/AST/ast-dump-types-json.cpp index 0a92483fb1c7f..c1bb9266fa869 100644 --- a/clang/test/AST/ast-dump-types-json.cpp +++ b/clang/test/AST/ast-dump-types-json.cpp @@ -101,7 +101,6 @@ using ::TestUsingShadowDeclType; // CHECK-NEXT: }, // CHECK-NEXT: "name": "TestElaboratedType2", // CHECK-NEXT: "type": { -// CHECK-NEXT: "desugaredQualType": "NS::S", // CHECK-NEXT: "qualType": "NS::S" // CHECK-NEXT: }, // CHECK-NEXT: "inner": [ diff --git a/clang/test/AST/coroutine-locals-cleanup.cpp b/clang/test/AST/coroutine-locals-cleanup.cpp index 6eb6fc0948cbb..ce106b8e230a1 100644 --- a/clang/test/AST/coroutine-locals-cleanup.cpp +++ b/clang/test/AST/coroutine-locals-cleanup.cpp @@ -85,7 +85,7 @@ Task bar() { // CHECK: CaseStmt // CHECK: ExprWithCleanups {{.*}} 'void' // CHECK-NEXT: CoawaitExpr -// CHECK-NEXT: CXXBindTemporaryExpr {{.*}} 'Task':'Task' (CXXTemporary {{.*}}) +// CHECK-NEXT: CXXBindTemporaryExpr {{.*}} 'Task' (CXXTemporary {{.*}}) // CHECK: MaterializeTemporaryExpr {{.*}} 'Awaiter':'Task::Awaiter' // CHECK: ExprWithCleanups {{.*}} 'bool' // CHECK-NEXT: CXXMemberCallExpr {{.*}} 'bool' @@ -98,7 +98,7 @@ Task bar() { // CHECK: CaseStmt // CHECK: ExprWithCleanups {{.*}} 'void' // CHECK-NEXT: CoawaitExpr -// CHECK-NEXT: CXXBindTemporaryExpr {{.*}} 'Task':'Task' (CXXTemporary {{.*}}) +// CHECK-NEXT: CXXBindTemporaryExpr {{.*}} 'Task' (CXXTemporary {{.*}}) // CHECK: MaterializeTemporaryExpr {{.*}} 'Awaiter':'Task::Awaiter' // CHECK: ExprWithCleanups {{.*}} 'bool' // CHECK-NEXT: CXXMemberCallExpr {{.*}} 'bool' diff --git a/clang/test/AST/float16.cpp b/clang/test/AST/float16.cpp index 9e0d70b9a1c00..a9e1144cf0958 100644 --- a/clang/test/AST/float16.cpp +++ b/clang/test/AST/float16.cpp @@ -85,7 +85,7 @@ auto C = -1.0f16 + B; //CHECK-NEXT: | `-ImplicitCastExpr {{.*}} '__fp16' //CHECK-NEXT: | `-UnaryOperator {{.*}} 'double' prefix '-' //CHECK-NEXT: | `-FloatingLiteral {{.*}} 'double' 1.000000e-01 -//CHECK-NEXT: |-VarDecl {{.*}} C 'float':'float' cinit +//CHECK-NEXT: |-VarDecl {{.*}} C 'float' cinit //CHECK-NEXT: | `-BinaryOperator {{.*}} 'float' '+' //CHECK-NEXT: | |-ImplicitCastExpr {{.*}} 'float' //CHECK-NEXT: | | `-UnaryOperator {{.*}} '_Float16' prefix '-' @@ -100,7 +100,7 @@ auto C = -1.0f16 + B; //CHECK-NATIVE: | `-ImplicitCastExpr {{.*}} '__fp16' //CHECK-NATIVE: | `-UnaryOperator {{.*}} 'double' prefix '-' //CHECK-NATIVE: | `-FloatingLiteral {{.*}} 'double' 1.000000e-01 -//CHECK-NATIVE: |-VarDecl {{.*}} C '__fp16':'__fp16' cinit +//CHECK-NATIVE: |-VarDecl {{.*}} C '__fp16' cinit //CHECK-NATIVE: | `-BinaryOperator {{.*}} '__fp16' '+' //CHECK-NATIVE: | |-ImplicitCastExpr {{.*}} '__fp16' //CHECK-NATIVE: | | `-UnaryOperator {{.*}} '_Float16' prefix '-' @@ -178,12 +178,12 @@ template C func1t(C arg) { //CHECK-NEXT: | | `-FloatingLiteral {{.*}} '_Float16' 2.000000e+00 //CHECK-NEXT: | `-FunctionDecl {{.*}} used func1t '_Float16 (_Float16)' //CHECK-NEXT: | |-TemplateArgument type '_Float16' -//CHECK: | |-ParmVarDecl {{.*}} used arg '_Float16':'_Float16' +//CHECK: | |-ParmVarDecl {{.*}} used arg '_Float16' //CHECK-NEXT: | `-CompoundStmt //CHECK-NEXT: | `-ReturnStmt //CHECK-NEXT: | `-BinaryOperator {{.*}} '_Float16' '*' -//CHECK-NEXT: | |-ImplicitCastExpr {{.*}} '_Float16':'_Float16' -//CHECK-NEXT: | | `-DeclRefExpr {{.*}} '_Float16':'_Float16' lvalue ParmVar {{.*}} 'arg' '_Float16':'_Float16' +//CHECK-NEXT: | |-ImplicitCastExpr {{.*}} '_Float16' +//CHECK-NEXT: | | `-DeclRefExpr {{.*}} '_Float16' lvalue ParmVar {{.*}} 'arg' '_Float16' //CHECK-NEXT: | `-FloatingLiteral {{.*}} '_Float16' 2.000000e+00 @@ -223,14 +223,14 @@ int main(void) { //CHECK-NEXT: | `-FloatingLiteral {{.*}} 'double' 1.000977e+00 C1 c1(f1l); -//CHECK: | `-VarDecl{{.*}} used c1 'C1':'C1' callinit -//CHECK-NEXT: | `-CXXConstructExpr {{.*}} 'C1':'C1' 'void (_Float16) +//CHECK: | `-VarDecl{{.*}} used c1 'C1' callinit +//CHECK-NEXT: | `-CXXConstructExpr {{.*}} 'C1' 'void (_Float16) //CHECK-NEXT: | `-ImplicitCastExpr {{.*}} '_Float16' //CHECK-NEXT: | `-DeclRefExpr {{.*}} '_Float16' lvalue Var 0x{{.*}} 'f1l' '_Float16' S1<_Float16> s1 = { 132.f16 }; -//CHECK: | `-VarDecl {{.*}} used s1 'S1<_Float16>':'S1<_Float16>' cinit -//CHECK-NEXT: | `-InitListExpr {{.*}} 'S1<_Float16>':'S1<_Float16>' +//CHECK: | `-VarDecl {{.*}} used s1 'S1<_Float16>' cinit +//CHECK-NEXT: | `-InitListExpr {{.*}} 'S1<_Float16>' //CHECK-NEXT: | `-FloatingLiteral {{.*}} '_Float16' 1.320000e+02 _Float16 f4l = func1n(f1l) + func1f(f2l) + c1.func1c(f3l) + c1.func2c(f1l) + @@ -255,37 +255,37 @@ int main(void) { //CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2l' '_Float16' //CHECK-NEXT: | | | | | | `-CXXMemberCallExpr {{.*}} '_Float16' //CHECK-NEXT: | | | | | | |-MemberExpr {{.*}} '' .func1c {{.*}} -//CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'C1':'C1' lvalue Var {{.*}} 'c1' 'C1':'C1' +//CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'C1' lvalue Var {{.*}} 'c1' 'C1' //CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} '_Float16' //CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f3l' '_Float16' //CHECK-NEXT: | | | | | `-CallExpr {{.*}} '_Float16' //CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} '_Float16 (*)(_Float16)' //CHECK-NEXT: | | | | | | `-MemberExpr {{.*}} '_Float16 (_Float16)' lvalue .func2c {{.*}} -//CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'C1':'C1' lvalue Var {{.*}} 'c1' 'C1':'C1' +//CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'C1' lvalue Var {{.*}} 'c1' 'C1' //CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} '_Float16' //CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f1l' '_Float16' -//CHECK-NEXT: | | | | `-CallExpr {{.*}} '_Float16':'_Float16' +//CHECK-NEXT: | | | | `-CallExpr {{.*}} '_Float16' //CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} '_Float16 (*)(_Float16)' //CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} '_Float16 (_Float16)' lvalue Function {{.*}} 'func1t' '_Float16 (_Float16)' (FunctionTemplate {{.*}} 'func1t') //CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} '_Float16' //CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f1l' '_Float16' //CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} '_Float16' //CHECK-NEXT: | | | `-MemberExpr {{.*}} '_Float16' lvalue .mem2 {{.*}} -//CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'S1<_Float16>':'S1<_Float16>' lvalue Var {{.*}} 's1' 'S1<_Float16>':'S1<_Float16>' +//CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'S1<_Float16>' lvalue Var {{.*}} 's1' 'S1<_Float16>' //CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} '_Float16' //CHECK-NEXT: | | `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f1n' '_Float16' //CHECK-NEXT: | `-ImplicitCastExpr {{.*}} '_Float16' //CHECK-NEXT: | `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2n' '_Float16' auto f5l = -1.f16, *f6l = &f2l, f7l = func1t(f3l); -//CHECK: | |-VarDecl {{.*}} f5l '_Float16':'_Float16' cinit +//CHECK: | |-VarDecl {{.*}} f5l '_Float16' cinit //CHECK-NEXT: | | `-UnaryOperator {{.*}} '_Float16' prefix '-' //CHECK-NEXT: | | `-FloatingLiteral {{.*}} '_Float16' 1.000000e+00 //CHECK-NEXT: | |-VarDecl {{.*}} f6l '_Float16 *' cinit //CHECK-NEXT: | | `-UnaryOperator {{.*}} '_Float16 *' prefix '&' //CHECK-NEXT: | | `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2l' '_Float16' -//CHECK-NEXT: | `-VarDecl {{.*}} f7l '_Float16':'_Float16' cinit -//CHECK-NEXT: | `-CallExpr {{.*}} '_Float16':'_Float16' +//CHECK-NEXT: | `-VarDecl {{.*}} f7l '_Float16' cinit +//CHECK-NEXT: | `-CallExpr {{.*}} '_Float16' //CHECK-NEXT: | |-ImplicitCastExpr {{.*}} '_Float16 (*)(_Float16)' //CHECK-NEXT: | | `-DeclRefExpr {{.*}} '_Float16 (_Float16)' lvalue Function {{.*}} 'func1t' '_Float16 (_Float16)' (FunctionTemplate {{.*}} 'func1t') //CHECK-NEXT: | `-ImplicitCastExpr {{.*}} '_Float16' diff --git a/clang/test/AST/nrvo.c b/clang/test/AST/nrvo.c index a3e12759ad0d9..078445cd972c4 100644 --- a/clang/test/AST/nrvo.c +++ b/clang/test/AST/nrvo.c @@ -7,11 +7,11 @@ struct A f1(void) { // CHECK-NEXT: CompoundStmt 0x{{[^ ]*}} struct A a; // CHECK-NEXT: DeclStmt 0x{{[^ ]*}} - // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:12 used a 'struct A':'struct A' nrvo + // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:12 used a 'struct A' nrvo return a; // CHECK-NEXT: ReturnStmt 0x{{[^ ]*}} - // CHECK-NEXT: ImplicitCastExpr 0x{{[^ ]*}} 'struct A':'struct A' - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'struct A':'struct A' lvalue Var 0x{{[^ ]*}} 'a' 'struct A':'struct A' + // CHECK-NEXT: ImplicitCastExpr 0x{{[^ ]*}} 'struct A' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'struct A' lvalue Var 0x{{[^ ]*}} 'a' 'struct A' } void f2(void) { @@ -20,10 +20,10 @@ void f2(void) { // CHECK-NEXT: CompoundStmt 0x{{[^ ]*}} struct A a; // CHECK-NEXT: DeclStmt 0x{{[^ ]*}} - // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:14 used a 'struct A':'struct A' nrvo + // CHECK-NEXT: VarDecl 0x{{[^ ]*}} col:14 used a 'struct A' nrvo return a; // CHECK-NEXT: ReturnStmt 0x{{[^ ]*}} - // CHECK-NEXT: ImplicitCastExpr 0x{{[^ ]*}} 'struct A':'struct A' - // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'struct A':'struct A' lvalue Var 0x{{[^ ]*}} 'a' 'struct A':'struct A' + // CHECK-NEXT: ImplicitCastExpr 0x{{[^ ]*}} 'struct A' + // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} 'struct A' lvalue Var 0x{{[^ ]*}} 'a' 'struct A' }(); } diff --git a/clang/test/AST/sourceranges.cpp b/clang/test/AST/sourceranges.cpp index 40135bd2e17c6..f78d34c1ee7e3 100644 --- a/clang/test/AST/sourceranges.cpp +++ b/clang/test/AST/sourceranges.cpp @@ -49,14 +49,14 @@ void construct() { A a = A(12); // CHECK: CXXConstructExpr {{0x[0-9a-fA-F]+}} 'A':'foo::A' 'void (int){{( __attribute__\(\(thiscall\)\))?}}' D d = D(12); - // CHECK: CXXConstructExpr {{0x[0-9a-fA-F]+}} 'D':'D' 'void (int){{( __attribute__\(\(thiscall\)\))?}}' + // CHECK: CXXConstructExpr {{0x[0-9a-fA-F]+}} 'D' 'void (int){{( __attribute__\(\(thiscall\)\))?}}' } namespace PR38987 { struct A { A(); }; template void f() { T{}; } template void f(); -// CHECK: CXXTemporaryObjectExpr {{.*}} 'PR38987::A':'PR38987::A' +// CHECK: CXXTemporaryObjectExpr {{.*}} 'PR38987::A' } void abort() __attribute__((noreturn)); diff --git a/clang/test/C/drs/dr253.c b/clang/test/C/drs/dr253.c index 3eae0a01586a7..1c6f610dbcca1 100644 --- a/clang/test/C/drs/dr253.c +++ b/clang/test/C/drs/dr253.c @@ -19,7 +19,7 @@ struct fred y [] = { { { "abc" }, 1 }, [0] = { .s[0] = 'q' } }; // CHECK: VarDecl 0x{{.*}} col:13 y 'struct fred[1]' cinit // CHECK-NEXT: InitListExpr 0x{{.*}} 'struct fred[1]' -// CHECK-NEXT: InitListExpr 0x{{.*}} 'struct fred':'struct fred' +// CHECK-NEXT: InitListExpr 0x{{.*}} 'struct fred' // CHECK-NEXT: InitListExpr 0x{{.*}} 'char[6]' // CHECK-NEXT: array_filler // CHECK-NEXT: ImplicitCastExpr diff --git a/clang/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p4-ast.cpp b/clang/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p4-ast.cpp index 4fc0a05ae1eca..32c4ddd921bba 100644 --- a/clang/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p4-ast.cpp +++ b/clang/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p4-ast.cpp @@ -3,7 +3,7 @@ void f() noexcept; // CHECK: VarDecl {{.*}} ref 'void (&)()' cinit -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void ()':'void ()' lvalue +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void ()' lvalue // CHECK-NEXT: DeclRefExpr {{.*}} 'void () noexcept' lvalue Function {{.*}} 'f' 'void () noexcept' void (&ref)() = f; @@ -13,6 +13,6 @@ struct X { } x; // CHECK: VarDecl {{.*}} xp 'void (&)()' cinit -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void ()':'void ()' lvalue -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void () noexcept':'void () noexcept' lvalue +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void ()' lvalue +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void () noexcept' lvalue void (&xp)() = x; diff --git a/clang/test/Import/objc-param-decl/test.m b/clang/test/Import/objc-param-decl/test.m index dc98c31200193..f2b6aa0927295 100644 --- a/clang/test/Import/objc-param-decl/test.m +++ b/clang/test/Import/objc-param-decl/test.m @@ -5,7 +5,7 @@ // CHECK-SAME: FirstParam // CHECK-SAME: 'id' // CHECK-NEXT: ObjCTypeParamDecl -// CHECK-SAME: 'id':'id' +// CHECK-SAME: 'id' void expr() { Dictionary *d; diff --git a/clang/test/OpenMP/align_clause_ast_print.cpp b/clang/test/OpenMP/align_clause_ast_print.cpp index 3ed602d533bc8..87000f9c41bae 100644 --- a/clang/test/OpenMP/align_clause_ast_print.cpp +++ b/clang/test/OpenMP/align_clause_ast_print.cpp @@ -122,7 +122,7 @@ int template_test() { // DUMP: NonTypeTemplateParmDecl {{.*}}'unsigned int' depth 0 index 1 size // DUMP: IntegerLiteral {{.*}}'unsigned int' 1 // DUMP: OMPAllocateDecl {{.*}} -// DUMP: DeclRefExpr {{.*}}'double':'double' lvalue Var {{.*}} 'foo' 'double':'double' +// DUMP: DeclRefExpr {{.*}}'double' lvalue Var {{.*}} 'foo' 'double' // DUMP: OMPAlignClause {{.*}} // DUMP: ConstantExpr {{.*}}'unsigned int' // DUMP: value: Int 1 diff --git a/clang/test/OpenMP/generic_loop_ast_print.cpp b/clang/test/OpenMP/generic_loop_ast_print.cpp index 29c48ffde8092..df806405571cf 100644 --- a/clang/test/OpenMP/generic_loop_ast_print.cpp +++ b/clang/test/OpenMP/generic_loop_ast_print.cpp @@ -51,15 +51,15 @@ //DUMP: FunctionDecl{{.*}}templ_foo 'void (int)' //DUMP: TemplateArgument type 'int' //DUMP: TemplateArgument integral 2 -//DUMP: ParmVarDecl{{.*}}'int':'int' +//DUMP: ParmVarDecl{{.*}}'int' //DUMP: OMPSimdDirective //DUMP: OMPCollapseClause //DUMP: ConstantExpr{{.*}}'int' //DUMP: value: Int 2 //DUMP: OMPReductionClause -//DUMP: DeclRefExpr{{.*}}'z' 'int':'int' +//DUMP: DeclRefExpr{{.*}}'z' 'int' //DUMP: OMPLastprivateClause -//DUMP: DeclRefExpr{{.*}}'j' 'int':'int' +//DUMP: DeclRefExpr{{.*}}'j' 'int' //DUMP: ForStmt template void templ_foo(T t) { diff --git a/clang/test/OpenMP/scope_ast_print.cpp b/clang/test/OpenMP/scope_ast_print.cpp index c69908980d229..c5c3a29738a72 100644 --- a/clang/test/OpenMP/scope_ast_print.cpp +++ b/clang/test/OpenMP/scope_ast_print.cpp @@ -81,8 +81,8 @@ int template_test() { //DUMP: BuiltinType {{.*}}'double' //DUMP: OMPScopeDirective //DUMP: OMPPrivateClause -//DUMP: DeclRefExpr {{.*}}'double':'double' lvalue Var {{.*}} 'a' 'double':'double' +//DUMP: DeclRefExpr {{.*}}'double' lvalue Var {{.*}} 'a' 'double' //DUMP: OMPReductionClause -//DUMP: DeclRefExpr {{.*}}'double':'double' lvalue Var {{.*}} 'b' 'double':'double' +//DUMP: DeclRefExpr {{.*}}'double' lvalue Var {{.*}} 'b' 'double' //PRINT: #pragma omp scope private(a) reduction(*: b) #endif // HEADER diff --git a/clang/test/SemaCXX/co_await-ast.cpp b/clang/test/SemaCXX/co_await-ast.cpp index b9eae493be952..10cee21da0e87 100644 --- a/clang/test/SemaCXX/co_await-ast.cpp +++ b/clang/test/SemaCXX/co_await-ast.cpp @@ -47,13 +47,13 @@ awaitable foo() { // CHECK: |-CompoundStmt {{.*}} // CHECK: | `-ExprWithCleanups {{.*}} 'void' // CHECK: | `-CoawaitExpr {{.*}} 'void' -// CHECK: | |-CXXTemporaryObjectExpr {{.*}} 'executor':'executor' 'void (){{.*}} noexcept' zeroing +// CHECK: | |-CXXTemporaryObjectExpr {{.*}} 'executor' 'void (){{.*}} noexcept' zeroing // CHECK: | |-MaterializeTemporaryExpr {{.*}} 'result_t':'awaitable_frame::result_t' lvalue // CHECK: | | `-CXXBindTemporaryExpr {{.*}} 'result_t':'awaitable_frame::result_t' (CXXTemporary {{.*}}) // CHECK: | | `-CXXMemberCallExpr {{.*}} 'result_t':'awaitable_frame::result_t' // CHECK: | | |-MemberExpr {{.*}} '' .await_transform {{.*}} // CHECK: | | | `-DeclRefExpr {{.*}} 'std::coroutine_traits::promise_type':'awaitable_frame' lvalue Var {{.*}} '__promise' 'std::coroutine_traits::promise_type':'awaitable_frame' -// CHECK: | | `-CXXTemporaryObjectExpr {{.*}} 'executor':'executor' 'void (){{.*}} noexcept' zeroing +// CHECK: | | `-CXXTemporaryObjectExpr {{.*}} 'executor' 'void (){{.*}} noexcept' zeroing // CHECK: | |-ExprWithCleanups {{.*}} 'bool' // CHECK: | | `-CXXMemberCallExpr {{.*}} 'bool' // CHECK: | | `-MemberExpr {{.*}} '' .await_ready {{.*}} @@ -64,7 +64,7 @@ awaitable foo() { // CHECK: | | `-CXXMemberCallExpr {{.*}} 'result_t':'awaitable_frame::result_t' // CHECK: | | |-MemberExpr {{.*}} '' .await_transform {{.*}} // CHECK: | | | `-DeclRefExpr {{.*}} 'std::coroutine_traits::promise_type':'awaitable_frame' lvalue Var {{.*}} '__promise' 'std::coroutine_traits::promise_type':'awaitable_frame' -// CHECK: | | `-CXXTemporaryObjectExpr {{.*}} 'executor':'executor' 'void (){{.*}} noexcept' zeroing +// CHECK: | | `-CXXTemporaryObjectExpr {{.*}} 'executor' 'void (){{.*}} noexcept' zeroing // CHECK: | |-ExprWithCleanups {{.*}} 'void' // CHECK: | | `-CXXMemberCallExpr {{.*}} 'void' // CHECK: | | |-MemberExpr {{.*}} '' .await_suspend {{.*}} @@ -74,9 +74,9 @@ awaitable foo() { // CHECK: | | | `-CXXMemberCallExpr {{.*}} 'result_t':'awaitable_frame::result_t' // CHECK: | | | |-MemberExpr {{.*}} '' .await_transform {{.*}} // CHECK: | | | | `-DeclRefExpr {{.*}} 'std::coroutine_traits::promise_type':'awaitable_frame' lvalue Var {{.*}} '__promise' 'std::coroutine_traits::promise_type':'awaitable_frame' -// CHECK: | | | `-CXXTemporaryObjectExpr {{.*}} 'executor':'executor' 'void (){{.*}} noexcept' zeroing -// CHECK: | | `-ImplicitCastExpr {{.*}} 'std::coroutine_handle':'std::coroutine_handle' -// CHECK: | | `-CXXConstructExpr {{.*}} 'std::coroutine_handle':'std::coroutine_handle' 'void (coroutine_handle &&){{.*}} noexcept' +// CHECK: | | | `-CXXTemporaryObjectExpr {{.*}} 'executor' 'void (){{.*}} noexcept' zeroing +// CHECK: | | `-ImplicitCastExpr {{.*}} 'std::coroutine_handle' +// CHECK: | | `-CXXConstructExpr {{.*}} 'std::coroutine_handle' 'void (coroutine_handle &&){{.*}} noexcept' // CHECK: | | `-MaterializeTemporaryExpr {{.*}} 'coroutine_handle':'std::coroutine_handle' xvalue // CHECK: | | `-CallExpr {{.*}} 'coroutine_handle':'std::coroutine_handle' // CHECK: | | |-ImplicitCastExpr {{.*}} 'coroutine_handle (*)(void *) noexcept' @@ -93,6 +93,6 @@ awaitable foo() { // CHECK: | `-CXXMemberCallExpr {{.*}} 'result_t':'awaitable_frame::result_t' // CHECK: | |-MemberExpr {{.*}} '' .await_transform {{.*}} // CHECK: | | `-DeclRefExpr {{.*}} 'std::coroutine_traits::promise_type':'awaitable_frame' lvalue Var {{.*}} '__promise' 'std::coroutine_traits::promise_type':'awaitable_frame' -// CHECK: | `-CXXTemporaryObjectExpr {{.*}} 'executor':'executor' 'void (){{.*}} noexcept' zeroing +// CHECK: | `-CXXTemporaryObjectExpr {{.*}} 'executor' 'void (){{.*}} noexcept' zeroing // Rest of the generated coroutine statements omitted. diff --git a/clang/test/SemaCXX/consteval-cleanup.cpp b/clang/test/SemaCXX/consteval-cleanup.cpp index ddc56174b484f..499c45db50177 100644 --- a/clang/test/SemaCXX/consteval-cleanup.cpp +++ b/clang/test/SemaCXX/consteval-cleanup.cpp @@ -21,11 +21,11 @@ void foo() { // CHECK: foo // CHECK: ExprWithCleanups // CHECK-NEXT: BinaryOperator {{.*}} 'bool' ',' - // CHECK-NEXT: BinaryOperator {{.*}} 'P':'P' ',' - // CHECK-NEXT: CXXFunctionalCastExpr {{.*}} 'A':'A' - // CHECK-NEXT: CXXBindTemporaryExpr {{.*}} 'A':'A' - // CHECK-NEXT: CXXConstructExpr {{.*}} 'A':'A' - // CHECK: ConstantExpr {{.*}} 'P':'P' + // CHECK-NEXT: BinaryOperator {{.*}} 'P' ',' + // CHECK-NEXT: CXXFunctionalCastExpr {{.*}} 'A' + // CHECK-NEXT: CXXBindTemporaryExpr {{.*}} 'A' + // CHECK-NEXT: CXXConstructExpr {{.*}} 'A' + // CHECK: ConstantExpr {{.*}} 'P' // CHECK-NEXT: value: // CHECK-NEXT: ExprWithCleanups } @@ -36,10 +36,10 @@ void foobar() { // CHECK: ExprWithCleanups // CHECK-NEXT: cleanup Block // CHECK-NEXT: BinaryOperator {{.*}} 'bool' ',' - // CHECK-NEXT: BinaryOperator {{.*}} 'P':'P' ',' + // CHECK-NEXT: BinaryOperator {{.*}} 'P' ',' // CHECK-NEXT: CallExpr // CHECK-NEXT: BlockExpr - // CHECK: ConstantExpr {{.*}} 'P':'P' + // CHECK: ConstantExpr {{.*}} 'P' // CHECK-NEXT: value: // CHECK-NEXT: ExprWithCleanups // CHECK-NOT: cleanup Block diff --git a/clang/test/SemaOpenCLCXX/address-space-deduction.clcpp b/clang/test/SemaOpenCLCXX/address-space-deduction.clcpp index 3c801f2c4db76..526e86584c77b 100644 --- a/clang/test/SemaOpenCLCXX/address-space-deduction.clcpp +++ b/clang/test/SemaOpenCLCXX/address-space-deduction.clcpp @@ -6,7 +6,7 @@ constexpr int foo = 0; //CHECK: |-VarDecl {{.*}} foo1 'T' cinit -//CHECK: `-VarTemplateSpecializationDecl {{.*}} used foo1 '__global long':'__global long' implicit_instantiation cinit +//CHECK: `-VarTemplateSpecializationDecl {{.*}} used foo1 '__global long' implicit_instantiation cinit template T foo1 = 0; diff --git a/clang/test/SemaOpenCLCXX/addrspace-auto.clcpp b/clang/test/SemaOpenCLCXX/addrspace-auto.clcpp index 7862564d1b4ef..97fad5939955c 100644 --- a/clang/test/SemaOpenCLCXX/addrspace-auto.clcpp +++ b/clang/test/SemaOpenCLCXX/addrspace-auto.clcpp @@ -1,18 +1,18 @@ //RUN: %clang_cc1 %s -pedantic -ast-dump -verify | FileCheck %s __constant int i = 1; -//CHECK: |-VarDecl {{.*}} ai '__global int':'__global int' +//CHECK: |-VarDecl {{.*}} ai '__global int' auto ai = i; kernel void test() { int i; - //CHECK: VarDecl {{.*}} ai '__private int':'__private int' + //CHECK: VarDecl {{.*}} ai '__private int' auto ai = i; constexpr int c = 1; - //CHECK: VarDecl {{.*}} used cai '__constant int':'__constant int' + //CHECK: VarDecl {{.*}} used cai '__constant int' __constant auto cai = c; - //CHECK: VarDecl {{.*}} aii '__private int':'__private int' + //CHECK: VarDecl {{.*}} aii '__private int' auto aii = cai; //CHECK: VarDecl {{.*}} ref '__private int &__private' diff --git a/clang/test/SemaTemplate/aggregate-deduction-candidate.cpp b/clang/test/SemaTemplate/aggregate-deduction-candidate.cpp index d455d424ab3d0..7f535651bb815 100644 --- a/clang/test/SemaTemplate/aggregate-deduction-candidate.cpp +++ b/clang/test/SemaTemplate/aggregate-deduction-candidate.cpp @@ -21,8 +21,8 @@ namespace Basic { // CHECK: `-CXXDeductionGuideDecl {{.*}} implicit used 'auto (double, double) -> Basic::A' // CHECK: |-TemplateArgument type 'double' // CHECK: | `-BuiltinType {{.*}} 'double' - // CHECK: |-ParmVarDecl {{.*}} 'double':'double' - // CHECK: `-ParmVarDecl {{.*}} 'double':'double' + // CHECK: |-ParmVarDecl {{.*}} 'double' + // CHECK: `-ParmVarDecl {{.*}} 'double' // CHECK: FunctionProtoType {{.*}} 'auto (T, T) -> A' dependent trailing_return cdecl // CHECK: |-InjectedClassNameType {{.*}} 'A' dependent // CHECK: | `-CXXRecord {{.*}} 'A' @@ -65,13 +65,13 @@ namespace Basic { // CHECK: FunctionTemplateDecl {{.*}} implicit // CHECK: |-TemplateTypeParmDecl {{.*}} referenced typename depth 0 index 0 T // CHECK: |-CXXDeductionGuideDecl {{.*}} implicit 'auto (S, T) -> C' - // CHECK: | |-ParmVarDecl {{.*}} 'S':'S' + // CHECK: | |-ParmVarDecl {{.*}} 'S' // CHECK: | `-ParmVarDecl {{.*}} 'T' // CHECK: `-CXXDeductionGuideDecl {{.*}} implicit used 'auto (S, int) -> Basic::C' // CHECK: |-TemplateArgument type 'int' // CHECK: | `-BuiltinType {{.*}} 'int' // CHECK: |-ParmVarDecl {{.*}} 'S':'Basic::S' - // CHECK: `-ParmVarDecl {{.*}} 'int':'int' + // CHECK: `-ParmVarDecl {{.*}} 'int' // CHECK: FunctionProtoType {{.*}} 'auto (S, T) -> C' dependent trailing_return cdecl // CHECK: |-InjectedClassNameType {{.*}} 'C' dependent // CHECK: | `-CXXRecord {{.*}} 'C' @@ -87,8 +87,8 @@ namespace Basic { // CHECK: FunctionTemplateDecl {{.*}} implicit // CHECK: |-TemplateTypeParmDecl {{.*}} referenced typename depth 0 index 0 T // CHECK: `-CXXDeductionGuideDecl {{.*}} implicit 'auto (int, int) -> D' - // CHECK: |-ParmVarDecl {{.*}} 'int':'int' - // CHECK: `-ParmVarDecl {{.*}} 'int':'int' + // CHECK: |-ParmVarDecl {{.*}} 'int' + // CHECK: `-ParmVarDecl {{.*}} 'int' // CHECK: FunctionProtoType {{.*}} 'auto (int, int) -> D' dependent trailing_return cdecl // CHECK: |-InjectedClassNameType {{.*}} 'D' dependent // CHECK: | `-CXXRecord {{.*}} 'D' @@ -117,7 +117,7 @@ namespace Basic { // CHECK: `-CXXDeductionGuideDecl {{.*}} implicit used 'auto (int, decltype(t)) -> Basic::E' // CHECK: |-TemplateArgument type 'int' // CHECK: | `-BuiltinType {{.*}} 'int' - // CHECK: |-ParmVarDecl {{.*}} 'int':'int' + // CHECK: |-ParmVarDecl {{.*}} 'int' // CHECK: `-ParmVarDecl {{.*}} 'decltype(t)':'int' // CHECK: FunctionProtoType {{.*}} 'auto (T, decltype(t)) -> E' dependent trailing_return cdecl // CHECK: |-InjectedClassNameType {{.*}} 'E' dependent @@ -150,7 +150,7 @@ namespace Basic { // CHECK: |-TemplateArgument type 'int' // CHECK: | `-BuiltinType {{.*}} 'int' // CHECK: |-ParmVarDecl {{.*}} 'typename I::type':'int' - // CHECK: `-ParmVarDecl {{.*}} 'int':'int' + // CHECK: `-ParmVarDecl {{.*}} 'int' // CHECK: FunctionProtoType {{.*}} 'auto (typename I::type, T) -> F' dependent trailing_return cdecl // CHECK: |-InjectedClassNameType {{.*}} 'F' dependent // CHECK: | `-CXXRecord {{.*}} 'F' @@ -235,8 +235,8 @@ namespace BraceElision { // CHECK: `-CXXDeductionGuideDecl {{.*}} implicit used 'auto (int, int) -> BraceElision::A' // CHECK: |-TemplateArgument type 'int' // CHECK: | `-BuiltinType {{.*}} 'int' - // CHECK: |-ParmVarDecl {{.*}} 'int':'int' - // CHECK: `-ParmVarDecl {{.*}} 'int':'int' + // CHECK: |-ParmVarDecl {{.*}} 'int' + // CHECK: `-ParmVarDecl {{.*}} 'int' // CHECK: FunctionProtoType {{.*}} 'auto (T, T) -> A' dependent trailing_return cdecl // CHECK: |-InjectedClassNameType {{.*}} 'A' dependent // CHECK: | `-CXXRecord {{.*}} 'A' @@ -275,8 +275,8 @@ namespace TrailingPack { // CHECK: | `-TemplateArgument type 'TrailingPack::(lambda at {{.*}})' // CHECK: | `-RecordType {{.*}} 'TrailingPack::(lambda at {{.*}})' // CHECK: | `-CXXRecord {{.*}} '' - // CHECK: |-ParmVarDecl {{.*}} 'TrailingPack::(lambda at {{.*}})':'TrailingPack::(lambda at {{.*}})' - // CHECK: `-ParmVarDecl {{.*}} 'TrailingPack::(lambda at {{.*}})':'TrailingPack::(lambda at {{.*}})' + // CHECK: |-ParmVarDecl {{.*}} 'TrailingPack::(lambda at {{.*}})' + // CHECK: `-ParmVarDecl {{.*}} 'TrailingPack::(lambda at {{.*}})' // CHECK: FunctionProtoType {{.*}} 'auto (T...) -> A' dependent trailing_return cdecl // CHECK: |-InjectedClassNameType {{.*}} 'A' dependent // CHECK: | `-CXXRecord {{.*}} 'A' @@ -324,7 +324,7 @@ namespace DeduceArity { // CHECK: FunctionTemplateDecl {{.*}} implicit // CHECK: |-TemplateTypeParmDecl {{.*}} referenced typename depth 0 index 0 ... T // CHECK: |-CXXDeductionGuideDecl {{.*}} implicit 'auto (Types, T...) -> F' - // CHECK: | |-ParmVarDecl {{.*}} 'Types':'Types' + // CHECK: | |-ParmVarDecl {{.*}} 'Types' // CHECK: | `-ParmVarDecl {{.*}} 'T...' pack // CHECK: |-CXXDeductionGuideDecl {{.*}} implicit used // CHECK-SAME: 'auto (Types, DeduceArity::X, DeduceArity::Y, DeduceArity::Z) -> @@ -340,16 +340,16 @@ namespace DeduceArity { // CHECK: | | `-RecordType {{.*}} 'DeduceArity::Z' // CHECK: | | `-CXXRecord {{.*}} 'Z' // CHECK: | |-ParmVarDecl {{.*}} 'Types':'DeduceArity::Types' - // CHECK: | |-ParmVarDecl {{.*}} 'DeduceArity::X':'DeduceArity::X' - // CHECK: | |-ParmVarDecl {{.*}} 'DeduceArity::Y':'DeduceArity::Y' - // CHECK: | `-ParmVarDecl {{.*}} 'DeduceArity::Z':'DeduceArity::Z' + // CHECK: | |-ParmVarDecl {{.*}} 'DeduceArity::X' + // CHECK: | |-ParmVarDecl {{.*}} 'DeduceArity::Y' + // CHECK: | `-ParmVarDecl {{.*}} 'DeduceArity::Z' // CHECK: `-CXXDeductionGuideDecl {{.*}} implicit 'auto (Types, DeduceArity::X) -> DeduceArity::F' // CHECK: |-TemplateArgument pack // CHECK: | `-TemplateArgument type 'DeduceArity::X' // CHECK: | `-RecordType {{.*}} 'DeduceArity::X' // CHECK: | `-CXXRecord {{.*}} 'X' // CHECK: |-ParmVarDecl {{.*}} 'Types':'DeduceArity::Types' - // CHECK: `-ParmVarDecl {{.*}} 'DeduceArity::X':'DeduceArity::X' + // CHECK: `-ParmVarDecl {{.*}} 'DeduceArity::X' // CHECK: FunctionProtoType {{.*}} 'auto (Types, T...) -> F' dependent trailing_return cdecl // CHECK: |-InjectedClassNameType {{.*}} 'F' dependent // CHECK: | `-CXXRecord {{.*}} 'F' diff --git a/clang/test/SemaTemplate/deduction-guide.cpp b/clang/test/SemaTemplate/deduction-guide.cpp index 9de2975656ee2..16c7083df29d0 100644 --- a/clang/test/SemaTemplate/deduction-guide.cpp +++ b/clang/test/SemaTemplate/deduction-guide.cpp @@ -37,7 +37,7 @@ using AT = A; // CHECK: | | `-Var {{.*}} 'arr1' 'int[3]' // CHECK: | `-TemplateArgument decl // CHECK: | `-Var {{.*}} 'arr2' 'int[3]' -// CHECK: |-ParmVarDecl {{.*}} 'X<&arr1, &arr2>':'X<&arr1, &arr2>' +// CHECK: |-ParmVarDecl {{.*}} 'X<&arr1, &arr2>' // CHECK: |-ParmVarDecl {{.*}} 'int (*)[3]' // CHECK: |-ParmVarDecl {{.*}} 'int (*)[3]' // CHECK: `-ParmVarDecl {{.*}} 'short (*)[4]' @@ -76,7 +76,7 @@ using BT = B; // CHECK: |-TemplateArgument integral 120 // CHECK: |-TemplateArgument type 'std::nullptr_t' // CHECK: |-TemplateArgument nullptr -// CHECK: `-ParmVarDecl {{.*}} 'X':'X' +// CHECK: `-ParmVarDecl {{.*}} 'X' // CHECK: FunctionProtoType {{.*}} 'auto (X) -> B' dependent trailing_return // CHECK: |-InjectedClassNameType {{.*}} 'B' dependent // CHECK: `-TemplateSpecializationType {{.*}} 'X' dependent X @@ -111,9 +111,9 @@ using CT = C; // CHECK: |-TemplateArgument template B // CHECK: |-TemplateArgument type 'int' // CHECK: |-TemplateArgument integral 0 -// CHECK: |-ParmVarDecl {{.*}} 'int':'int' -// CHECK: |-ParmVarDecl {{.*}} 'Y':'Y' -// CHECK: `-ParmVarDecl {{.*}} 'int':'int' +// CHECK: |-ParmVarDecl {{.*}} 'int' +// CHECK: |-ParmVarDecl {{.*}} 'Y' +// CHECK: `-ParmVarDecl {{.*}} 'int' // CHECK: FunctionProtoType {{.*}} 'auto (A, Y<>, type-parameter-0-2) -> C' dependent trailing_return cdecl // CHECK: |-InjectedClassNameType {{.*}} 'C' dependent // CHECK: |-TemplateTypeParmType {{.*}} 'A' dependent depth 0 index 0 @@ -234,7 +234,7 @@ F s(0); // CHECK: |-TemplateArgument integral 120 // CHECK: |-TemplateArgument type 'int' // CHECK: | `-BuiltinType {{.*}} 'int' -// CHECK: `-ParmVarDecl {{.*}} 'int':'int' +// CHECK: `-ParmVarDecl {{.*}} 'int' // CHECK: FunctionProtoType {{.*}} 'auto (type-parameter-0-1) -> F<>' dependent trailing_return cdecl // CHECK: |-InjectedClassNameType {{.*}} 'F<>' dependent // CHECK: | `-CXXRecord {{.*}} 'F' diff --git a/clang/test/SemaTemplate/default-expr-arguments-3.cpp b/clang/test/SemaTemplate/default-expr-arguments-3.cpp index 4bbdb6f91ec7e..4d04209e110b3 100644 --- a/clang/test/SemaTemplate/default-expr-arguments-3.cpp +++ b/clang/test/SemaTemplate/default-expr-arguments-3.cpp @@ -4,7 +4,7 @@ // CHECK: FunctionDecl {{.*}} used func 'void ()' // CHECK-NEXT: TemplateArgument type 'int' // CHECK: LambdaExpr {{.*}} '(lambda at -// CHECK: ParmVarDecl {{.*}} used f 'foo':'foo' cinit +// CHECK: ParmVarDecl {{.*}} used f 'foo' cinit // CHECK-NEXT: DeclRefExpr {{.*}} 'foo' EnumConstant {{.*}} 'a' 'foo' namespace PR28795 { @@ -23,7 +23,7 @@ namespace PR28795 { // CHECK: ClassTemplateSpecializationDecl {{.*}} struct class2 definition // CHECK: TemplateArgument type 'int' // CHECK: LambdaExpr {{.*}} '(lambda at -// CHECK: ParmVarDecl {{.*}} used f 'foo':'foo' cinit +// CHECK: ParmVarDecl {{.*}} used f 'foo' cinit // CHECK-NEXT: DeclRefExpr {{.*}} 'foo' EnumConstant {{.*}} 'a' 'foo' // Template struct case: @@ -41,7 +41,7 @@ template struct class2; // CHECK-NEXT: FunctionDecl {{.*}} f1 'void ()' // CHECK: FunctionDecl {{.*}} f1 'void ()' // CHECK-NEXT: TemplateArgument type 'int' -// CHECK: ParmVarDecl {{.*}} n 'foo':'foo' cinit +// CHECK: ParmVarDecl {{.*}} n 'foo' cinit // CHECK-NEXT: DeclRefExpr {{.*}} 'foo' EnumConstant {{.*}} 'a' 'foo' template diff --git a/clang/test/SemaTemplate/make_integer_seq.cpp b/clang/test/SemaTemplate/make_integer_seq.cpp index 12520167b93e5..644bf41f8614c 100644 --- a/clang/test/SemaTemplate/make_integer_seq.cpp +++ b/clang/test/SemaTemplate/make_integer_seq.cpp @@ -37,7 +37,7 @@ using test2 = B; // CHECK-NEXT: `-ElaboratedType 0x{{[0-9A-Fa-f]+}} '__make_integer_seq' sugar // CHECK-NEXT: `-TemplateSpecializationType 0x{{[0-9A-Fa-f]+}} '__make_integer_seq' sugar alias __make_integer_seq // CHECK-NEXT: |-TemplateArgument template A -// CHECK-NEXT: |-TemplateArgument type 'int':'int' +// CHECK-NEXT: |-TemplateArgument type 'int' // CHECK-NEXT: | `-SubstTemplateTypeParmType 0x{{[0-9A-Fa-f]+}} 'int' sugar class depth 0 index 0 B1 // CHECK-NEXT: | |-TypeAliasTemplate 0x{{[0-9A-Fa-f]+}} 'B' // CHECK-NEXT: | `-BuiltinType 0x{{[0-9A-Fa-f]+}} 'int' @@ -48,14 +48,14 @@ using test2 = B; // CHECK-NEXT: | |-NonTypeTemplateParmDecl 0x{{[0-9A-Fa-f]+}} col:24 referenced 'B1' depth 0 index 1 B2 // CHECK-NEXT: | `-IntegerLiteral 0x{{[0-9A-Fa-f]+}} 'int' 1 // CHECK-NEXT: `-TemplateSpecializationType 0x{{[0-9A-Fa-f]+}} 'A' sugar A -// CHECK-NEXT: |-TemplateArgument type 'int':'int' +// CHECK-NEXT: |-TemplateArgument type 'int' // CHECK-NEXT: | `-SubstTemplateTypeParmType 0x{{[0-9A-Fa-f]+}} 'int' sugar class depth 0 index 0 B1 // CHECK-NEXT: | |-TypeAliasTemplate 0x{{[0-9A-Fa-f]+}} 'B' // CHECK-NEXT: | `-BuiltinType 0x{{[0-9A-Fa-f]+}} 'int' // CHECK-NEXT: |-TemplateArgument expr // CHECK-NEXT: | `-ConstantExpr 0x{{[0-9A-Fa-f]+}} 'int' // CHECK-NEXT: | |-value: Int 0 -// CHECK-NEXT: | `-IntegerLiteral 0x{{[0-9A-Fa-f]+}} 'int':'int' 0 +// CHECK-NEXT: | `-IntegerLiteral 0x{{[0-9A-Fa-f]+}} 'int' 0 // CHECK-NEXT: `-RecordType 0x{{[0-9A-Fa-f]+}} 'A' // CHECK-NEXT: `-ClassTemplateSpecialization 0x{{[0-9A-Fa-f]+}} 'A' @@ -99,7 +99,7 @@ template