From 27fff014809c96b45a9685da306dc7b7292b2ad7 Mon Sep 17 00:00:00 2001 From: smanna12 Date: Fri, 13 Sep 2024 14:25:40 -0700 Subject: [PATCH] [SYCL] Fix Ambiguity in Overloaded Unary Minus Operator for bfloat16 (#15393) This commit resolves an overload resolution ambiguity on Windows self-builds in downstream pulldown when using the unary minus operator with bfloat16. The operator now takes a const-qualified argument, enabling its use with both lvalues and rvalues and fixing the reported build error. --------- Signed-off-by: Larsen, Steffen Signed-off-by: Sarnie, Nick Signed-off-by: Soumi Manna Co-authored-by: Steffen Larsen Co-authored-by: Fraser Cormack Co-authored-by: Artur Gainullin Co-authored-by: Nick Sarnie Co-authored-by: Kenneth Benzie (Benie) Co-authored-by: Chris Perkins Co-authored-by: Michael Toguchi --- sycl/include/sycl/ext/oneapi/bfloat16.hpp | 2 +- .../check_device_code/vector/vector_math_ops.cpp | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/bfloat16.hpp b/sycl/include/sycl/ext/oneapi/bfloat16.hpp index 70b96a3976d39..18f53a41b0a5f 100644 --- a/sycl/include/sycl/ext/oneapi/bfloat16.hpp +++ b/sycl/include/sycl/ext/oneapi/bfloat16.hpp @@ -182,7 +182,7 @@ class bfloat16 { explicit operator bool() { return to_float(value) != 0.0f; } // Unary minus operator overloading - friend bfloat16 operator-(bfloat16 &lhs) { + friend bfloat16 operator-(const bfloat16 &lhs) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) && \ (__SYCL_CUDA_ARCH__ >= 800) detail::Bfloat16StorageT res; diff --git a/sycl/test/check_device_code/vector/vector_math_ops.cpp b/sycl/test/check_device_code/vector/vector_math_ops.cpp index dadc8da98551e..d03a81b502544 100644 --- a/sycl/test/check_device_code/vector/vector_math_ops.cpp +++ b/sycl/test/check_device_code/vector/vector_math_ops.cpp @@ -341,9 +341,8 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I]]) -// CHECK-NEXT: [[REF_TMP_ASCAST_I:%.*]] = addrspacecast ptr [[REF_TMP_I]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.memset.p4.i64(ptr addrspace(4) noundef align 32 dereferenceable(32) [[AGG_RESULT]], i8 0, i64 32, i1 false), !alias.scope [[META100]] +// CHECK-NEXT: [[REF_TMP_ASCAST_I:%.*]] = addrspacecast ptr [[REF_TMP_I]] to ptr addrspace(4) // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] @@ -351,16 +350,17 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILNGERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI16EEE_EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META100]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META103:![0-9]+]] // CHECK-NEXT: [[FNEG_I:%.*]] = fneg float [[CALL_I_I_I]] -// CHECK-NEXT: store float [[FNEG_I]], ptr [[REF_TMP_I]], align 4, !tbaa [[TBAA47]], !noalias [[META100]] +// CHECK-NEXT: store float [[FNEG_I]], ptr [[REF_TMP_I]], align 4, !tbaa [[TBAA47]], !noalias [[META103]] +// CHECK-NEXT: [[CALL_I_I10_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I]]) #[[ATTR8]], !noalias [[META103]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I]]) // CHECK-NEXT: [[ARRAYIDX_I_I_I9_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[CALL_I_I10_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I]]) #[[ATTR8]], !noalias [[META100]] // CHECK-NEXT: store i16 [[CALL_I_I10_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I9_I]], align 2, !tbaa [[TBAA103:![0-9]+]], !alias.scope [[META100]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP105:![0-9]+]] // CHECK: _ZN4sycl3_V16detailngERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi16EEE.exit: -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I]]) // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; }