Skip to content

Commit

Permalink
Merge branch 'sycl' of https://github.com/intel/llvm into benchmarkin…
Browse files Browse the repository at this point in the history
…g-workflow
  • Loading branch information
ianayl committed Jul 4, 2024
2 parents 6ea0110 + ef62cad commit 6d14a32
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 41 deletions.
18 changes: 6 additions & 12 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1705,20 +1705,14 @@ def SYCLIntelMaxWorkGroupSize : InheritableAttr {
let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
let Subjects = SubjectList<[Function], ErrorDiag>;
let AdditionalMembers = [{
std::optional<llvm::APSInt> getXDimVal() const {
if (const auto *CE = dyn_cast<ConstantExpr>(getXDim()))
return CE->getResultAsAPSInt();
return std::nullopt;
unsigned getXDimVal() const {
return cast<ConstantExpr>(getXDim())->getResultAsAPSInt().getExtValue();
}
std::optional<llvm::APSInt> getYDimVal() const {
if (const auto *CE = dyn_cast<ConstantExpr>(getYDim()))
return CE->getResultAsAPSInt();
return std::nullopt;
unsigned getYDimVal() const {
return cast<ConstantExpr>(getYDim())->getResultAsAPSInt().getExtValue();
}
std::optional<llvm::APSInt> getZDimVal() const {
if (const auto *CE = dyn_cast<ConstantExpr>(getZDim()))
return CE->getResultAsAPSInt();
return std::nullopt;
unsigned getZDimVal() const {
return cast<ConstantExpr>(getZDim())->getResultAsAPSInt().getExtValue();
}
}];
let Documentation = [SYCLIntelMaxWorkGroupSizeAttrDocs];
Expand Down
6 changes: 3 additions & 3 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -826,9 +826,9 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
// Attributes arguments (first and third) are reversed on SYCLDevice.
if (getLangOpts().SYCLIsDevice) {
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt(*A->getZDimVal())),
llvm::ConstantAsMetadata::get(Builder.getInt(*A->getYDimVal())),
llvm::ConstantAsMetadata::get(Builder.getInt(*A->getXDimVal()))};
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDimVal())),
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDimVal())),
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDimVal()))};
Fn->setMetadata("max_work_group_size",
llvm::MDNode::get(Context, AttrMDArgs));
}
Expand Down
14 changes: 7 additions & 7 deletions clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,13 +252,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
bool HasMaxWorkGroupSize = false;
bool HasMinWorkGroupPerCU = false;
if (const auto *MWGS = FD->getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
auto MaxThreads = (*MWGS->getZDimVal()).getExtValue() *
(*MWGS->getYDimVal()).getExtValue() *
(*MWGS->getXDimVal()).getExtValue();
if (MaxThreads > 0) {
addNVVMMetadata(F, "maxntidx", MaxThreads);
HasMaxWorkGroupSize = true;
}
HasMaxWorkGroupSize = true;
// We must index-flip between SYCL's notation, X,Y,Z (aka dim0,dim1,dim2)
// with the fastest-moving dimension rightmost, to CUDA's, where X is the
// fastest-moving dimension.
addNVVMMetadata(F, "maxntidx", MWGS->getZDimVal());
addNVVMMetadata(F, "maxntidy", MWGS->getYDimVal());
addNVVMMetadata(F, "maxntidz", MWGS->getXDimVal());
}

auto attrValue = [&](Expr *E) {
Expand Down
58 changes: 39 additions & 19 deletions clang/test/CodeGenSYCL/launch_bounds_nvptx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// compute unit and maximum work groups per multi-processor attributes, that
// correspond to CUDA's launch bounds. Expect max_work_group_size,
// min_work_groups_per_cu and max_work_groups_per_mp that are mapped to
// maxntidx, minctasm, and maxclusterrank NVVM annotations respectively.
// maxntid[xyz], minctasm, and maxclusterrank NVVM annotations respectively.

#include "sycl.hpp"

Expand All @@ -13,24 +13,24 @@ queue q;

class Foo {
public:
[[intel::max_work_group_size(8, 8, 8), intel::min_work_groups_per_cu(2),
[[intel::max_work_group_size(2, 4, 8), intel::min_work_groups_per_cu(2),
intel::max_work_groups_per_mp(4)]] void
operator()() const {}
};

template <int N> class Functor {
public:
[[intel::max_work_group_size(N, 8, 8), intel::min_work_groups_per_cu(N),
[[intel::max_work_group_size(N, 4, 8), intel::min_work_groups_per_cu(N),
intel::max_work_groups_per_mp(N)]] void
operator()() const {}
};

template <int N>
[[intel::max_work_group_size(N, 8, 8), intel::min_work_groups_per_cu(N),
[[intel::max_work_group_size(N, 4, 8), intel::min_work_groups_per_cu(N),
intel::max_work_groups_per_mp(N)]] void
zoo() {}

[[intel::max_work_group_size(8, 8, 8), intel::min_work_groups_per_cu(2),
[[intel::max_work_group_size(2, 4, 8), intel::min_work_groups_per_cu(2),
intel::max_work_groups_per_mp(4)]] void
bar() {}

Expand All @@ -42,7 +42,7 @@ int main() {

// Test attribute is applied on lambda.
h.single_task<class kernel_name2>(
[] [[intel::max_work_group_size(8, 8, 8),
[] [[intel::max_work_group_size(2, 4, 8),
intel::min_work_groups_per_cu(2),
intel::max_work_groups_per_mp(4)]] () {});

Expand All @@ -65,41 +65,61 @@ int main() {
// CHECK: define dso_local void @{{.*}}kernel_name4() #0 {{.*}} !min_work_groups_per_cu ![[MWGPC:[0-9]+]] !max_work_groups_per_mp ![[MWGPM:[0-9]+]] !max_work_group_size ![[MWGS:[0-9]+]]
// CHECK: define dso_local void @{{.*}}kernel_name5() #0 {{.*}} !min_work_groups_per_cu ![[MWGPC_MWGPM_2:[0-9]+]] !max_work_groups_per_mp ![[MWGPC_MWGPM_2]] !max_work_group_size ![[MWGS_3:[0-9]+]]

// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxntidz", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxntidz", i32 2}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxntidz", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxntidz", i32 2}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxntidx", i32 384}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxntidz", i32 6}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"minctasm", i32 6}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxclusterrank", i32 6}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxntidx", i32 384}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxntidz", i32 6}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"minctasm", i32 6}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxclusterrank", i32 6}
// CHECK: {{.*}}@{{.*}}kernel_name4, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}kernel_name4, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}kernel_name4, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name4, !"maxntidz", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name4, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name4, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}bar{{.*}}, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}bar{{.*}}, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}bar{{.*}}, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}bar{{.*}}, !"maxntidz", i32 2}
// CHECK: {{.*}}@{{.*}}bar{{.*}}, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}bar{{.*}}, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name5, !"maxntidx", i32 1024}
// CHECK: {{.*}}@{{.*}}kernel_name5, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}kernel_name5, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name5, !"maxntidz", i32 16}
// CHECK: {{.*}}@{{.*}}kernel_name5, !"minctasm", i32 16}
// CHECK: {{.*}}@{{.*}}kernel_name5, !"maxclusterrank", i32 16}
// CHECK: {{.*}}@{{.*}}zoo{{.*}}, !"maxntidx", i32 1024}
// CHECK: {{.*}}@{{.*}}zoo{{.*}}, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}zoo{{.*}}, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}zoo{{.*}}, !"maxntidz", i32 16}
// CHECK: {{.*}}@{{.*}}zoo{{.*}}, !"minctasm", i32 16}
// CHECK: {{.*}}@{{.*}}zoo{{.*}}, !"maxclusterrank", i32 16}

// CHECK: ![[MWGPC]] = !{i32 2}
// CHECK: ![[MWGPM]] = !{i32 4}
// CHECK: ![[MWGS]] = !{i32 8, i32 8, i32 8}
// CHECK: ![[MWGS]] = !{i32 8, i32 4, i32 2}
// CHECK: ![[MWGPC_MWGPM]] = !{i32 6}
// CHECK: ![[MWGS_2]] = !{i32 8, i32 8, i32 6}
// CHECK: ![[MWGS_2]] = !{i32 8, i32 4, i32 6}
// CHECK: ![[MWGPC_MWGPM_2]] = !{i32 16}
// CHECK: ![[MWGS_3]] = !{i32 8, i32 8, i32 16}
// CHECK: ![[MWGS_3]] = !{i32 8, i32 4, i32 16}

0 comments on commit 6d14a32

Please sign in to comment.