diff --git a/IGC/Compiler/Optimizer/OpenCLPasses/SubGroupFuncs/SubGroupFuncsResolution.cpp b/IGC/Compiler/Optimizer/OpenCLPasses/SubGroupFuncs/SubGroupFuncsResolution.cpp index dd4d79e851f0..5826bebce721 100644 --- a/IGC/Compiler/Optimizer/OpenCLPasses/SubGroupFuncs/SubGroupFuncsResolution.cpp +++ b/IGC/Compiler/Optimizer/OpenCLPasses/SubGroupFuncs/SubGroupFuncsResolution.cpp @@ -333,22 +333,6 @@ void SubGroupFuncsResolution::mediaBlockWrite(llvm::CallInst& CI) CI.eraseFromParent(); } -// If CI parameter is %"class.sycl::_V1::ext::oneapi::bfloat16" type, which is { i16 }, -// then we need to cast it to i16 type before calling simdBlockRead intrinsic. -static inline Value* castSYCLBFloat16toi16(PointerType* PtrTy, Value* Ptr, CallInst& CI, LLVMContext& C) -{ - if (StructType* ST = dyn_cast(IGCLLVM::getNonOpaquePtrEltTy(PtrTy))) - { - // check if ST has only field and this field is i16 type - if (ST->getNumElements() == 1 && ST->getElementType(0)->isIntegerTy(16)) - { - return CastInst::CreatePointerCast(Ptr, PointerType::get(Type::getInt16Ty(C), PtrTy->getAddressSpace()), "", &CI); - } - } - - return Ptr; -} - void SubGroupFuncsResolution::simdBlockRead(llvm::CallInst& CI, bool hasCacheControls) { // Creates intrinsics that will be lowered in the CodeGen and will handle the simd_block_read @@ -357,7 +341,7 @@ void SubGroupFuncsResolution::simdBlockRead(llvm::CallInst& CI, bool hasCacheCon PointerType* PtrTy = dyn_cast(Ptr->getType()); IGC_ASSERT_MESSAGE(PtrTy, "simdBlockRead has non-pointer type!"); SmallVector args; - args.push_back(castSYCLBFloat16toi16(PtrTy, Ptr, CI, C)); + args.push_back(Ptr); SmallVector types; types.push_back(nullptr); types.push_back(nullptr); GenISAIntrinsic::ID genIntrinID = GenISAIntrinsic::GenISA_simdBlockRead; @@ -463,7 +447,7 @@ void SubGroupFuncsResolution::simdBlockWrite(llvm::CallInst& CI, bool hasCacheCo SmallVector types; Value* dataArg = CI.getArgOperand(1); - args.push_back(castSYCLBFloat16toi16(PtrTy, Ptr, CI, C)); + args.push_back(CI.getArgOperand(0)); args.push_back(dataArg); switch (dataArg->getType()->getScalarType()->getScalarSizeInBits()) diff --git a/IGC/Compiler/tests/SubGroupFuncsResolution/simd_blockreadwrite_bfloat16.ll b/IGC/Compiler/tests/SubGroupFuncsResolution/simd_blockreadwrite_bfloat16.ll deleted file mode 100644 index f130335ba7be..000000000000 --- a/IGC/Compiler/tests/SubGroupFuncsResolution/simd_blockreadwrite_bfloat16.ll +++ /dev/null @@ -1,52 +0,0 @@ -;=========================== begin_copyright_notice ============================ -; -; Copyright (C) 2024 Intel Corporation -; -; SPDX-License-Identifier: MIT -; -;============================ end_copyright_notice ============================= -; -; RUN: igc_opt --platformdg2 --igc-sub-group-func-resolution -S %s 2>&1 | FileCheck %s -; ------------------------------------------------ -; SubGroupFuncsResolution -; ------------------------------------------------ -; This test checks that SubGroupFuncsResolution pass resolves mismatch -; between bfloat16 type passed from SYCL and built-ins accepting i16 type -; ------------------------------------------------ - -%"class.sycl::_V1::ext::oneapi::bfloat16" = type { i16 } - -define spir_kernel void @test_bfloat16(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)* %dst, %"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)* %src) #0 { -; CHECK-LABEL: @test_bfloat16( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)* [[SRC:%.*]] to i16 addrspace(1)* -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.genx.GenISA.simdBlockRead.v2i16.p1i16(i16 addrspace(1)* [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast %"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)* [[DST:%.*]] to i16 addrspace(3)* -; CHECK-NEXT: call void @llvm.genx.GenISA.simdBlockWrite.p3i16.v2i16(i16 addrspace(3)* [[TMP2]], <2 x i16> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = bitcast %"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)* [[SRC]] to i16 addrspace(1)* -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i16> @llvm.genx.GenISA.simdBlockRead.v16i16.p1i16(i16 addrspace(1)* [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast %"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)* [[DST]] to i16 addrspace(3)* -; CHECK-NEXT: call void @llvm.genx.GenISA.simdBlockWrite.p3i16.v16i16(i16 addrspace(3)* [[TMP5]], <16 x i16> [[TMP4]]) -; CHECK-NEXT: ret void -; -entry: - %0 = call spir_func <2 x i16> @__builtin_IB_simd_block_read_2_global_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)* %src) #0 - call spir_func void @__builtin_IB_simd_block_write_2_local_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)* %dst, <2 x i16> %0) #0 - %1 = call spir_func <16 x i16> @__builtin_IB_simd_block_read_16_global_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)* %src) #0 - call spir_func void @__builtin_IB_simd_block_write_16_local_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)* %dst, <16 x i16> %1) #0 - ret void -} - -declare spir_func <2 x i16> @__builtin_IB_simd_block_read_2_global_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)*) #0 -declare spir_func void @__builtin_IB_simd_block_write_2_local_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)*, <2 x i16>) #0 -declare spir_func <16 x i16> @__builtin_IB_simd_block_read_16_global_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)*) #0 -declare spir_func void @__builtin_IB_simd_block_write_16_local_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)*, <16 x i16>) #0 - -attributes #0 = { convergent noinline nounwind optnone } - -!igc.functions = !{!3} - -!3 = !{void (%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)*, %"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)*)* @test_bfloat16, !4} -!4 = !{!5, !6} -!5 = !{!"function_type", i32 0} -!6 = !{!"sub_group_size", i32 8}