-
Notifications
You must be signed in to change notification settings - Fork 730
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[sycl-post-link] Don't remove llvm.compiler.used for NVPTX (#15224)
`llvm.compiler.used` is a global symbol which tells the compiler not to touch some other global symbols until backend lowering. The `llvm.compiler.used` symbol itself is thus removed automatically during lowering for NVPTX and AMDGCN. Removal at `sycl-post-link`, prior to lowering, was causing issues for these backends, where symbols protected by `llvm.compiler.used` were getting removed after `sycl-post-link` and before lowering. We retain the current behaviour for SPIR-V, as SPIR-V generation is handled in `llvm-spirv` anyway, not in the LLVM SPIR-V backend. Also adds tests to make sure static device_globals are handled properly for NVPTX/AMDGCN.
- Loading branch information
Showing
5 changed files
with
108 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
;; This test verifies llc on AMDGCN will delete the llvm.compiler.used symbol | ||
;; while keeping the symbol in the outputted ASM. | ||
|
||
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck %s | ||
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck %s | ||
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a | FileCheck %s | ||
|
||
@keep_this = internal global i32 2, align 4 | ||
@llvm.compiler.used = appending global [1 x ptr] [ptr @keep_this], section "llvm.metadata" | ||
|
||
; CHECK-NOT: llvm.metadata | ||
; CHECK-NOT: llvm{{.*}}used | ||
; CHECK-NOT: llvm{{.*}}compiler{{.*}}used | ||
|
||
; CHECK: .type keep_this,@object ; | ||
|
||
; CHECK-NOT: llvm.metadata | ||
; CHECK-NOT: llvm{{.*}}used | ||
; CHECK-NOT: llvm{{.*}}compiler{{.*}}used |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
;; This test verifies llc on NVPTX will delete the llvm.compiler.used symbol | ||
;; while keeping the symbol in the outputted ASM. | ||
|
||
; RUN: llc < %s -march=nvptx64 | FileCheck %s | ||
|
||
@keep_this = internal global i32 2, align 4 | ||
@llvm.compiler.used = appending global [1 x ptr] [ptr @keep_this], section "llvm.metadata" | ||
|
||
; CHECK-NOT: llvm.metadata | ||
; CHECK-NOT: llvm{{.*}}used | ||
; CHECK-NOT: llvm{{.*}}compiler{{.*}}used | ||
|
||
; CHECK: .global .align 4 .u32 keep_this | ||
|
||
; CHECK-NOT: llvm.metadata | ||
; CHECK-NOT: llvm{{.*}}used | ||
; CHECK-NOT: llvm{{.*}}compiler{{.*}}used |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
// RUN: %{build} -o %t.out | ||
// RUN: %{run} %t.out | ||
// | ||
// The OpenCL GPU backends do not currently support device_global backend | ||
// calls. | ||
// UNSUPPORTED: opencl && gpu | ||
// | ||
// Tests static device_global access through device kernels. | ||
|
||
#include "common.hpp" | ||
|
||
static device_global<int[4], TestProperties> DeviceGlobalVar; | ||
|
||
int main() { | ||
queue Q; | ||
|
||
Q.single_task([=]() { DeviceGlobalVar.get()[0] = 42; }); | ||
// Make sure that the write happens before subsequent read | ||
Q.wait(); | ||
|
||
int OutVal = 0; | ||
{ | ||
buffer<int, 1> OutBuf(&OutVal, 1); | ||
Q.submit([&](handler &CGH) { | ||
auto OutAcc = OutBuf.get_access<access::mode::write>(CGH); | ||
CGH.single_task([=]() { OutAcc[0] = DeviceGlobalVar.get()[0]; }); | ||
}); | ||
} | ||
assert(OutVal == 42 && "Read value does not match."); | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
// Tests that the llvm.compiler.used symbol, which is used to implement static | ||
// device globals, is removed at some point in compilation. For SPIR-V this | ||
// symbol is removed at sycl-post-link and for NVPTX/AMDGCN it is removed at | ||
// lowering. | ||
// | ||
// It also checks that the symbol can be found in an object file for a given | ||
// triple, thus validating that `llvm-strings` can successfully be used to | ||
// check for the presence of the symbol. | ||
|
||
// UNSUPPORTED: windows | ||
|
||
// RUN: %clangxx -fsycl -fsycl-device-only %s -o %t | ||
// RUN: llvm-strings %t | grep "llvm.compiler.used" | ||
// RUN: %clangxx -fsycl %s -o %t | ||
// RUN: llvm-strings %t | not grep "llvm.compiler.used" | ||
|
||
// RUN: %if cuda %{ %clangxx -fsycl -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda %s -o %t %} | ||
// RUN: %if cuda %{ llvm-strings %t | grep "llvm.compiler.used" %} | ||
// RUN: %if cuda %{ %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda %s -o %t %} | ||
// RUN: %if cuda %{ llvm-strings %t | not grep "llvm.compiler.used" %} | ||
|
||
// RUN: %if hip_amd %{ %clangxx -fsycl -fsycl-device-only -fsycl-targets=amd_gpu_gfx906 %s -o %t %} | ||
// RUN: %if hip_amd %{ llvm-strings %t | grep "llvm.compiler.used" %} | ||
// RUN: %if hip_amd %{ %clangxx -fsycl -fsycl-targets=amd_gpu_gfx906 %s -o %t %} | ||
// RUN: %if hip_amd %{ llvm-strings %t | not grep "llvm.compiler.used" %} | ||
|
||
#include <sycl/sycl.hpp> | ||
|
||
using namespace sycl; | ||
using namespace sycl::ext::oneapi::experimental; | ||
|
||
static device_global<int> DeviceGlobalVar; | ||
|
||
int main() { | ||
sycl::queue{}.single_task([=] { volatile int ReadVal = DeviceGlobalVar; }); | ||
} |