Fix GEP lowering pass

The ptrtoint->inttoptr folding should handle vector values correctly.
intel · Sep 19, 2023 · dd055a9 · dd055a9
1 parent e916f98
commit dd055a9
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 3 deletions.
diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXGEPLowering.cpp b/IGC/VectorCompiler/lib/GenXCodeGen/GenXGEPLowering.cpp
@@ -160,11 +160,11 @@ Value *GenXGEPLowering::visitPtrToIntInst(PtrToIntInst &PTI) {
   auto *Ty = PTI.getType();
   Value *NewI = nullptr;
 
-  if (SrcTy->isPointerTy()) {
+  if (SrcTy->isPtrOrPtrVectorTy()) {
     NewI = Builder->CreatePtrToInt(Src, Ty);
     NewI->takeName(&PTI);
   } else {
-    IGC_ASSERT(SrcTy->isIntegerTy());
+    IGC_ASSERT(SrcTy->isIntOrIntVectorTy());
     NewI = Builder->CreateIntCast(Src, Ty, false);
   }
 

diff --git a/IGC/VectorCompiler/test/GenXGEPLowering/fold-ptrtoint.ll b/IGC/VectorCompiler/test/GenXGEPLowering/fold-ptrtoint.ll
@@ -10,7 +10,8 @@
 
 target datalayout = "e-p:64:64-p6:32:32-i64:64-n8:16:32:64"
 
-declare <32 x i32> @llvm.vc.internal.lsc.load.ugm.v32i32.v1i1.i64(<1 x i1>, i8, i8, i8, i8, i8, i64, i64, i16, i32, <32 x i32>) #4
+declare <32 x i32> @llvm.vc.internal.lsc.load.ugm.v32i32.v1i1.i64(<1 x i1>, i8, i8, i8, i8, i8, i64, i64, i16, i32, <32 x i32>)
+declare <32 x double> @llvm.vc.internal.lsc.load.ugm.v32f64.v32i1.v32i64(<32 x i1>, i8, i8, i8, i8, i8, i64, <32 x i64>, i16, i32, <32 x double>)
 
 ; CHECK-LABEL: @test_fold_cast
 define <32 x i32> @test_fold_cast(i8 addrspace(1)* align 8 %a) {
@@ -45,6 +46,26 @@ entry:
   ret <32 x i32> %res
 }
 
+; CHECK-LABEL: @test_fold_cast_vector
+define <32 x double> @test_fold_cast_vector(<32 x i1> %mask, <32 x i8 addrspace(1)*> %a) {
+entry:
+  %bitcast = bitcast <32 x i8 addrspace(1)*> %a to <32 x double addrspace(1)*>
+  ; CHECK: %pti = ptrtoint <32 x i8 addrspace(1)*> %a to <32 x i64>
+  %pti = ptrtoint <32 x double addrspace(1)*> %bitcast to <32 x i64>
+  %res = call <32 x double> @llvm.vc.internal.lsc.load.ugm.v32f64.v32i1.v32i64(<32 x i1> %mask, i8 3, i8 4, i8 1, i8 0, i8 0, i64 0, <32 x i64> %pti, i16 1, i32 0, <32 x double> undef)
+  ret <32 x double> %res
+}
+
+; CHECK-LABEL: @test_fold_itp_vector
+define <32 x double> @test_fold_itp_vector(<32 x i1> %mask, <32 x i64> %a) {
+entry:
+  %itp = inttoptr <32 x i64> %a to <32 x double addrspace(1)*>
+  %pti = ptrtoint <32 x double addrspace(1)*> %itp to <32 x i64>
+  ; CHECK: %res = call <32 x double> @llvm.vc.internal.lsc.load.ugm.v32f64.v32i1.v32i64(<32 x i1> %mask, i8 3, i8 4, i8 1, i8 0, i8 0, i64 0, <32 x i64> %a, i16 1, i32 0, <32 x double> undef)
+  %res = call <32 x double> @llvm.vc.internal.lsc.load.ugm.v32f64.v32i1.v32i64(<32 x i1> %mask, i8 3, i8 4, i8 1, i8 0, i8 0, i64 0, <32 x i64> %pti, i16 1, i32 0, <32 x double> undef)
+  ret <32 x double> %res
+}
+
 ; CHECK-LABEL: @test_cross_block
 define void @test_cross_block(i64 %a, i32 %b) {
 entry: