Skip to content

Commit

Permalink
[SYCL][NVPTX] Fix NVVM annotation for min_work_groups_per_cu attribute (
Browse files Browse the repository at this point in the history
#14418)

While the PTX directive is "minnctapersm", the NVVM annotation is
"minctasm". This can be seen in the NVPTX backend, and in the NVVM IR
spec, section 13.3.
  • Loading branch information
frasercrmck authored Jul 4, 2024
1 parent 4e5f351 commit ee3d746
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
<< MWGPCU << 0;
} else {
// The value is guaranteed to be > 0, pass it to the metadata.
addNVVMMetadata(F, "minnctapersm", attrValue(MWGPCU->getValue()));
addNVVMMetadata(F, "minctasm", attrValue(MWGPCU->getValue()));
HasMinWorkGroupPerCU = true;
}
}
Expand Down
22 changes: 11 additions & 11 deletions clang/test/CodeGenSYCL/launch_bounds_nvptx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// compute unit and maximum work groups per multi-processor attributes, that
// correspond to CUDA's launch bounds. Expect max_work_group_size,
// min_work_groups_per_cu and max_work_groups_per_mp that are mapped to
// maxntidx, minnctapersm, maxclusterrank PTX directives respectively.
// maxntidx, minctasm, and maxclusterrank NVVM annotations respectively.

#include "sycl.hpp"

Expand Down Expand Up @@ -66,34 +66,34 @@ int main() {
// CHECK: define dso_local void @{{.*}}kernel_name5() #0 {{.*}} !min_work_groups_per_cu ![[MWGPC_MWGPM_2:[0-9]+]] !max_work_groups_per_mp ![[MWGPC_MWGPM_2]] !max_work_group_size ![[MWGS_3:[0-9]+]]

// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"minnctapersm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"minnctapersm", i32 2}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"minnctapersm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"minnctapersm", i32 2}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxntidx", i32 384}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"minnctapersm", i32 6}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"minctasm", i32 6}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxclusterrank", i32 6}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxntidx", i32 384}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"minnctapersm", i32 6}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"minctasm", i32 6}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxclusterrank", i32 6}
// CHECK: {{.*}}@{{.*}}kernel_name4, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}kernel_name4, !"minnctapersm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name4, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name4, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}bar{{.*}}, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}bar{{.*}}, !"minnctapersm", i32 2}
// CHECK: {{.*}}@{{.*}}bar{{.*}}, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}bar{{.*}}, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name5, !"maxntidx", i32 1024}
// CHECK: {{.*}}@{{.*}}kernel_name5, !"minnctapersm", i32 16}
// CHECK: {{.*}}@{{.*}}kernel_name5, !"minctasm", i32 16}
// CHECK: {{.*}}@{{.*}}kernel_name5, !"maxclusterrank", i32 16}
// CHECK: {{.*}}@{{.*}}zoo{{.*}}, !"maxntidx", i32 1024}
// CHECK: {{.*}}@{{.*}}zoo{{.*}}, !"minnctapersm", i32 16}
// CHECK: {{.*}}@{{.*}}zoo{{.*}}, !"minctasm", i32 16}
// CHECK: {{.*}}@{{.*}}zoo{{.*}}, !"maxclusterrank", i32 16}

// CHECK: ![[MWGPC]] = !{i32 2}
Expand Down

0 comments on commit ee3d746

Please sign in to comment.