From ae0d6644f0c145ea70efd7bea64f9ecb71197065 Mon Sep 17 00:00:00 2001 From: Michael D Toguchi Date: Tue, 24 Sep 2024 15:49:49 -0700 Subject: [PATCH] [Driver][SYCL] Bound architecture mismatch with multiple targets When passing -fsycl-targets to specify targets to offload to, the user can pass multiple targets. When those targets are meant to set various associated architectures, those architectures should only be set for a singular given toolchain/target. Specifying a setting like -fsycl-targets=nvptx64,spir64_gen was setting the wrong device architecture for the spir64_gen compilation. The associated boundarch for nvptx64 (in this case sm_50) was being pushed to the spir64_gen target, causing the wrong device value to be used for the AOT compilation. Fix this issue by fixing the logic in which we were assigning the bound architecture to a given triple. The logic was not taking into account non-spir64_gen targets, assigning the wrong arch when spir64_gen was encountered after the nvptx64 target. --- clang/lib/Driver/Driver.cpp | 18 +++++++--- clang/test/Driver/sycl-offload-old-model.c | 38 ++++++++++++++++++++++ clang/test/Driver/sycl-offload.c | 25 ++++++++++++++ 3 files changed, 77 insertions(+), 4 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 502b80ecef5d3..794e54e015c4a 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6350,7 +6350,7 @@ class OffloadingActionBuilder final { if (GpuInitHasErrors) return true; - int I = 0; + int GenIndex = 0; // Fill SYCLTargetInfoList for (auto &TT : SYCLTripleList) { auto TCIt = llvm::find_if( @@ -6363,10 +6363,21 @@ class OffloadingActionBuilder final { // is the target device. if (TT.isSPIR() && TT.getSubArch() == llvm::Triple::SPIRSubArch_gen) { - StringRef Device(GpuArchList[I].second); + // Multiple spir64_gen targets are allowed to be used via the + // -fsycl-targets=spir64_gen and -fsycl-targets=intel_gpu_* + // specifiers. Using an index through the known GpuArchList + // values, increment through them accordingly to allow for + // the multiple settings as well as preventing re-use. + while (TT != GpuArchList[GenIndex].first && + GenIndex < GpuArchList.size()) + ++GenIndex; + if (GpuArchList[GenIndex].first != TT) + // No match. + continue; + StringRef Device(GpuArchList[GenIndex].second); SYCLTargetInfoList.emplace_back( *TCIt, Device.empty() ? nullptr : Device.data()); - ++I; + ++GenIndex; continue; } SYCLTargetInfoList.emplace_back(*TCIt, nullptr); @@ -6380,7 +6391,6 @@ class OffloadingActionBuilder final { } assert(OffloadArch && "Failed to find matching arch."); SYCLTargetInfoList.emplace_back(*TCIt, OffloadArch); - ++I; } } } diff --git a/clang/test/Driver/sycl-offload-old-model.c b/clang/test/Driver/sycl-offload-old-model.c index bc310be7ae58f..b78d23654bbd1 100644 --- a/clang/test/Driver/sycl-offload-old-model.c +++ b/clang/test/Driver/sycl-offload-old-model.c @@ -622,6 +622,44 @@ // CHK-PHASE-MULTI-TARG-BOUND-ARCH: 28: offload, "device-sycl (spir64-unknown-unknown)" {27}, object // CHK-PHASE-MULTI-TARG-BOUND-ARCH: 29: linker, {8, 21, 28}, image, (host-sycl) +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl \ +// RUN: -fno-sycl-instrument-device-code -fno-sycl-device-lib=all \ +// RUN: -fsycl-targets=nvptx64-nvidia-cuda,spir64_gen \ +// RUN: -Xsycl-target-backend=spir64_gen "-device skl" \ +// RUN: -ccc-print-phases %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-BOUND-ARCH2 %s +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 1: preprocessor, {0}, c++-cpp-output, (host-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 2: input, "[[INPUT]]", c++, (device-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 3: preprocessor, {2}, c++-cpp-output, (device-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 4: compiler, {3}, ir, (device-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 5: offload, "host-sycl (x86_64-unknown-linux-gnu)" {1}, "device-sycl (spir64_gen-unknown-unknown)" {4}, c++-cpp-output +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 6: compiler, {5}, ir, (host-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 7: backend, {6}, assembler, (host-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 8: assembler, {7}, object, (host-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 9: input, "[[INPUT]]", c++, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 10: preprocessor, {9}, c++-cpp-output, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 11: compiler, {10}, ir, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 12: linker, {11}, ir, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 13: sycl-post-link, {12}, ir, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 14: file-table-tform, {13}, ir, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 15: backend, {14}, assembler, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 16: assembler, {15}, object, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 17: linker, {15, 16}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 18: foreach, {14, 17}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 19: file-table-tform, {13, 18}, tempfiletable, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 20: clang-offload-wrapper, {19}, object, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 21: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {20}, object +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 22: linker, {4}, ir, (device-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 23: sycl-post-link, {22}, tempfiletable, (device-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 24: file-table-tform, {23}, tempfilelist, (device-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 25: llvm-spirv, {24}, tempfilelist, (device-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 26: backend-compiler, {25}, image, (device-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 27: file-table-tform, {23, 26}, tempfiletable, (device-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 28: clang-offload-wrapper, {27}, object, (device-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 29: offload, "device-sycl (spir64_gen-unknown-unknown)" {28}, object +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 30: linker, {8, 21, 29}, image, (host-sycl) + /// Check the behaviour however with swapped -fsycl-targets // RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fno-sycl-instrument-device-code -fno-sycl-device-lib=all -fsycl-targets=spir64,nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-BOUND-ARCH-FLIPPED %s diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index 60db00bea9c4f..b2ca27ce42b2d 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -387,6 +387,31 @@ // CHK-PHASE-MULTI-TARG-BOUND-ARCH: 16: assembler, {15}, object, (host-sycl) // CHK-PHASE-MULTI-TARG-BOUND-ARCH: 17: clang-linker-wrapper, {16}, image, (host-sycl) +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --offload-new-driver \ +// RUN: -fno-sycl-instrument-device-code -fno-sycl-device-lib=all \ +// RUN: -fsycl-targets=nvptx64-nvidia-cuda,spir64_gen \ +// RUN: -Xsycl-target-backend=spir64_gen "-device skl" \ +// RUN: -ccc-print-phases %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-BOUND-ARCH2 %s +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 1: preprocessor, {0}, c++-cpp-output, (host-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 2: compiler, {1}, ir, (host-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 3: input, "[[INPUT]]", c++, (device-sycl, skl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, skl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 5: compiler, {4}, ir, (device-sycl, skl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 6: backend, {5}, ir, (device-sycl, skl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 7: offload, "device-sycl (spir64_gen-unknown-unknown:skl)" {6}, ir +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 8: input, "[[INPUT]]", c++, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 9: preprocessor, {8}, c++-cpp-output, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 10: compiler, {9}, ir, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 11: backend, {10}, ir, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 12: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {11}, ir +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 13: clang-offload-packager, {7, 12}, image, (device-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 14: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (x86_64-unknown-linux-gnu)" {13}, ir +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 15: backend, {14}, assembler, (host-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 16: assembler, {15}, object, (host-sycl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 17: clang-linker-wrapper, {16}, image, (host-sycl) + /// ########################################################################### // Check if valid bound arch behaviour occurs when compiling for spir-v,nvidia-gpu, and amd-gpu