Skip to content

Commit

Permalink
[Driver][SYCL] Bound architecture mismatch with multiple targets
Browse files Browse the repository at this point in the history
When passing -fsycl-targets to specify targets to offload to, the user
can pass multiple targets.  When those targets are meant to set various
associated architectures, those architectures should only be set for a
singular given toolchain/target.

Specifying a setting like -fsycl-targets=nvptx64,spir64_gen was setting
the wrong device architecture for the spir64_gen compilation.  The
associated boundarch for nvptx64 (in this case sm_50) was being pushed
to the spir64_gen target, causing the wrong device value to be used for
the AOT compilation.

Fix this issue by fixing the logic in which we were assigning the bound
architecture to a given triple.  The logic was not taking into account
non-spir64_gen targets, assigning the wrong arch when spir64_gen was
encountered after the nvptx64 target.
  • Loading branch information
mdtoguchi committed Sep 24, 2024
1 parent bc3a43e commit ae0d664
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 4 deletions.
18 changes: 14 additions & 4 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6350,7 +6350,7 @@ class OffloadingActionBuilder final {
if (GpuInitHasErrors)
return true;

int I = 0;
int GenIndex = 0;
// Fill SYCLTargetInfoList
for (auto &TT : SYCLTripleList) {
auto TCIt = llvm::find_if(
Expand All @@ -6363,10 +6363,21 @@ class OffloadingActionBuilder final {
// is the target device.
if (TT.isSPIR() &&
TT.getSubArch() == llvm::Triple::SPIRSubArch_gen) {
StringRef Device(GpuArchList[I].second);
// Multiple spir64_gen targets are allowed to be used via the
// -fsycl-targets=spir64_gen and -fsycl-targets=intel_gpu_*
// specifiers. Using an index through the known GpuArchList
// values, increment through them accordingly to allow for
// the multiple settings as well as preventing re-use.
while (TT != GpuArchList[GenIndex].first &&
GenIndex < GpuArchList.size())
++GenIndex;
if (GpuArchList[GenIndex].first != TT)
// No match.
continue;
StringRef Device(GpuArchList[GenIndex].second);
SYCLTargetInfoList.emplace_back(
*TCIt, Device.empty() ? nullptr : Device.data());
++I;
++GenIndex;
continue;
}
SYCLTargetInfoList.emplace_back(*TCIt, nullptr);
Expand All @@ -6380,7 +6391,6 @@ class OffloadingActionBuilder final {
}
assert(OffloadArch && "Failed to find matching arch.");
SYCLTargetInfoList.emplace_back(*TCIt, OffloadArch);
++I;
}
}
}
Expand Down
38 changes: 38 additions & 0 deletions clang/test/Driver/sycl-offload-old-model.c
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,44 @@
// CHK-PHASE-MULTI-TARG-BOUND-ARCH: 28: offload, "device-sycl (spir64-unknown-unknown)" {27}, object
// CHK-PHASE-MULTI-TARG-BOUND-ARCH: 29: linker, {8, 21, 28}, image, (host-sycl)

// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl \
// RUN: -fno-sycl-instrument-device-code -fno-sycl-device-lib=all \
// RUN: -fsycl-targets=nvptx64-nvidia-cuda,spir64_gen \
// RUN: -Xsycl-target-backend=spir64_gen "-device skl" \
// RUN: -ccc-print-phases %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-BOUND-ARCH2 %s
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 2: input, "[[INPUT]]", c++, (device-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 3: preprocessor, {2}, c++-cpp-output, (device-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 4: compiler, {3}, ir, (device-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 5: offload, "host-sycl (x86_64-unknown-linux-gnu)" {1}, "device-sycl (spir64_gen-unknown-unknown)" {4}, c++-cpp-output
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 6: compiler, {5}, ir, (host-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 7: backend, {6}, assembler, (host-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 8: assembler, {7}, object, (host-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 9: input, "[[INPUT]]", c++, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 10: preprocessor, {9}, c++-cpp-output, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 11: compiler, {10}, ir, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 12: linker, {11}, ir, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 13: sycl-post-link, {12}, ir, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 14: file-table-tform, {13}, ir, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 15: backend, {14}, assembler, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 16: assembler, {15}, object, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 17: linker, {15, 16}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 18: foreach, {14, 17}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 19: file-table-tform, {13, 18}, tempfiletable, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 20: clang-offload-wrapper, {19}, object, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 21: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {20}, object
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 22: linker, {4}, ir, (device-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 23: sycl-post-link, {22}, tempfiletable, (device-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 24: file-table-tform, {23}, tempfilelist, (device-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 25: llvm-spirv, {24}, tempfilelist, (device-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 26: backend-compiler, {25}, image, (device-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 27: file-table-tform, {23, 26}, tempfiletable, (device-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 28: clang-offload-wrapper, {27}, object, (device-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 29: offload, "device-sycl (spir64_gen-unknown-unknown)" {28}, object
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 30: linker, {8, 21, 29}, image, (host-sycl)

/// Check the behaviour however with swapped -fsycl-targets
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fno-sycl-instrument-device-code -fno-sycl-device-lib=all -fsycl-targets=spir64,nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-BOUND-ARCH-FLIPPED %s
Expand Down
25 changes: 25 additions & 0 deletions clang/test/Driver/sycl-offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,31 @@
// CHK-PHASE-MULTI-TARG-BOUND-ARCH: 16: assembler, {15}, object, (host-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH: 17: clang-linker-wrapper, {16}, image, (host-sycl)

// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --offload-new-driver \
// RUN: -fno-sycl-instrument-device-code -fno-sycl-device-lib=all \
// RUN: -fsycl-targets=nvptx64-nvidia-cuda,spir64_gen \
// RUN: -Xsycl-target-backend=spir64_gen "-device skl" \
// RUN: -ccc-print-phases %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-BOUND-ARCH2 %s
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 2: compiler, {1}, ir, (host-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 3: input, "[[INPUT]]", c++, (device-sycl, skl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, skl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 5: compiler, {4}, ir, (device-sycl, skl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 6: backend, {5}, ir, (device-sycl, skl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 7: offload, "device-sycl (spir64_gen-unknown-unknown:skl)" {6}, ir
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 8: input, "[[INPUT]]", c++, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 9: preprocessor, {8}, c++-cpp-output, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 10: compiler, {9}, ir, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 11: backend, {10}, ir, (device-sycl, sm_50)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 12: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {11}, ir
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 13: clang-offload-packager, {7, 12}, image, (device-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 14: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (x86_64-unknown-linux-gnu)" {13}, ir
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 15: backend, {14}, assembler, (host-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 16: assembler, {15}, object, (host-sycl)
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 17: clang-linker-wrapper, {16}, image, (host-sycl)

/// ###########################################################################

// Check if valid bound arch behaviour occurs when compiling for spir-v,nvidia-gpu, and amd-gpu
Expand Down

0 comments on commit ae0d664

Please sign in to comment.