Skip to content

Commit

Permalink
Add lit test for ResouceLoop iteration unroll
Browse files Browse the repository at this point in the history
The resource loop for subiteration unroll needs lit test to cover the
change according to policy request.
  • Loading branch information
ichenkai authored and igcbot committed Oct 23, 2024
1 parent ec65813 commit ab8932d
Show file tree
Hide file tree
Showing 3 changed files with 247 additions and 10 deletions.
23 changes: 13 additions & 10 deletions IGC/Compiler/CISACodeGen/EmitVISAPass.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -901,19 +901,22 @@ class EmitPass : public llvm::FunctionPass
// need a temp (iter > 0) to save the unroll dst result to avoid shared SBID
Fn(flagSameLaneFlag, currentDestination, resource, needLoop);

m_encoder->SetNoMask();
// Sum lanes that did something (for correct goto at the end)
m_encoder->Or(flagSumMask, flagSumMask, flagSameLaneFlag);
m_encoder->Push();

// Last iteration does not need this
if ((iter < (iterations - 1)) && flagExecMask)
if (flagSameLaneFlag)
{
m_encoder->SetNoMask();
// mask out handled lanes out of remaining ExecMask
m_encoder->Xor(flagExecMask, flagExecMask, flagSameLaneFlag);
m_encoder->Cast(dwordPrevFlag, flagExecMask);
// Sum lanes that did something (for correct goto at the end)
m_encoder->Or(flagSumMask, flagSumMask, flagSameLaneFlag);
m_encoder->Push();

// Last iteration does not need this
if ((iter < (iterations - 1)) && flagExecMask)
{
m_encoder->SetNoMask();
// mask out handled lanes out of remaining ExecMask
m_encoder->Xor(flagExecMask, flagExecMask, flagSameLaneFlag);
m_encoder->Cast(dwordPrevFlag, flagExecMask);
m_encoder->Push();
}
}
}

Expand Down
126 changes: 126 additions & 0 deletions IGC/Compiler/tests/EmitVISAPass/resource-loop-unroll-iteration-lsc.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2023-2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
; REQUIRES: llvm-14-plus, regkeys
; RUN: igc_opt -platformbmg -igc-emit-visa -simd-mode 16 -inputrt -regkey ResourceLoopUnrollIteration=4,DumpVISAASMToConsole -S < %s | FileCheck %s
;
; Test checks how we emit ResourceLoop


@ThreadGroupSize_X = constant i32 64
@ThreadGroupSize_Y = constant i32 1
@ThreadGroupSize_Z = constant i32 1

define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
entry:
; CHECK: _main_0:

%svn = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17)
; CHECK: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0>

%nonuniform = zext i16 %svn to i32
; CHECK: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0>

%NonUniformResource = inttoptr i32 %nonuniform to <4 x float> addrspace(2621440)*
%offset = add i32 %src1, 1
; CHECK: add (M1_NM, 1) offset(0,0)<1> src1(0,0)<0;1,0> 0x1:w
; CHECK: mov (M1_NM, 1) offsetTrunc(0,0)<1> offset(0,0)<0;1,0>
; CHECK: mov (M1, 16) offsetTruncBroadcast(0,0)<1> offsetTrunc(0,0)<0;1,0>

%call = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* %NonUniformResource, i32 %offset, i32 4, i1 false)
; CHECK: _test1_001__opt_resource_loop:
; CHECK: setp (M1_NM, 16) P4 0x0:ud
; CHECK: setp (M1_NM, 16) P5 0x0:ud
; CHECK: cmp.eq (M1, 16) P5 V0032(0,0)<0;1,0> V0032(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5
; CHECK: fbl (M1_NM, 1) V0034(0,0)<1> V0033(0,0)<0;1,0>
; CHECK: shl (M1_NM, 1) V0034(0,0)<1> V0034(0,0)<0;1,0> 0x2:w
; CHECK: addr_add (M1_NM, 1) A0(0)<1> &nonuniform V0035(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) V0036(0,0)<1> r[A0(0),0]<0;1,0>:d
; CHECK: cmp.eq (M1, 16) P6 V0036(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
; CHECK: (P6) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0036)[offsetTruncBroadcast]:a32
; CHECK: or (M1_NM, 16) P4 P4 P6
; CHECK: xor (M1_NM, 16) P5 P5 P6
; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5
; CHECK: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0>
; CHECK: and (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0xf:ud
; CHECK: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w
; CHECK: addr_add (M1_NM, 1) A1(0)<1> &nonuniform V0038(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) V0039(0,0)<1> r[A1(0),0]<0;1,0>:d
; CHECK: cmp.eq (M1, 16) P7 V0039(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
; CHECK: and (M1_NM, 16) P7 P7 P5
; CHECK: (P7) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0039)[offsetTruncBroadcast]:a32
; CHECK: or (M1_NM, 16) P4 P4 P7
; CHECK: xor (M1_NM, 16) P5 P5 P7
; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5
; CHECK: fbl (M1_NM, 1) V0040(0,0)<1> V0033(0,0)<0;1,0>
; CHECK: and (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0xf:ud
; CHECK: shl (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0x2:w
; CHECK: addr_add (M1_NM, 1) A2(0)<1> &nonuniform V0041(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) V0042(0,0)<1> r[A2(0),0]<0;1,0>:d
; CHECK: cmp.eq (M1, 16) P8 V0042(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
; CHECK: and (M1_NM, 16) P8 P8 P5
; CHECK: (P8) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0042)[offsetTruncBroadcast]:a32
; CHECK: or (M1_NM, 16) P4 P4 P8
; CHECK: xor (M1_NM, 16) P5 P5 P8
; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5
; CHECK: fbl (M1_NM, 1) V0043(0,0)<1> V0033(0,0)<0;1,0>
; CHECK: and (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0xf:ud
; CHECK: shl (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0x2:w
; CHECK: addr_add (M1_NM, 1) A3(0)<1> &nonuniform V0044(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) V0045(0,0)<1> r[A3(0),0]<0;1,0>:d
; CHECK: cmp.eq (M1, 16) P9 V0045(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
; CHECK: and (M1_NM, 16) P9 P9 P5
; CHECK: (P9) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0045)[offsetTruncBroadcast]:a32
; CHECK: or (M1_NM, 16) P4 P4 P9
; CHECK: (!P4) goto (M1, 16) _test1_001__opt_resource_loop
%out = extractelement <3 x i32> %call, i32 %val
store i32 %out, i32 addrspace(1)* %dst, align 1
ret void
}

declare <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)*, i32, i32, i1) #4

declare i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32) #1
declare i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32, i32, i32) #6

declare float @llvm.genx.GenISA.WaveAll.f32(float, i8, i32) #0
declare i32 @llvm.genx.GenISA.WaveBallot(i1, i32)
declare i32 @llvm.genx.GenISA.firstbitLo(i32)

attributes #4 = { argmemonly nounwind readonly }


!IGCMetadata = !{!0}
!igc.functions = !{!21}

!0 = !{!"ModuleMD", !1}
!1 = !{!"FuncMD", !2, !3}
!2 = !{!"FuncMDMap[0]", void (i32, i32, i32 addrspace(1)*)* @test1}
!3 = !{!"FuncMDValue[0]", !4, !17}
!4 = !{!"resAllocMD", !5}
!5 = !{!"argAllocMDList", !6, !10, !11, !14, !15, !16}
!6 = !{!"argAllocMDListVec[0]", !7, !8, !9}
!7 = !{!"type", i32 0}
!8 = !{!"extensionType", i32 -1}
!9 = !{!"indexType", i32 -1}
!10 = !{!"argAllocMDListVec[1]", !7, !8, !9}
!11 = !{!"argAllocMDListVec[2]", !12, !8, !13}
!12 = !{!"type", i32 1}
!13 = !{!"indexType", i32 0}
!14 = !{!"argAllocMDListVec[3]", !7, !8, !9}
!15 = !{!"argAllocMDListVec[4]", !7, !8, !9}
!16 = !{!"argAllocMDListVec[5]", !7, !8, !9}
!17 = !{!"m_OpenCLArgTypeQualifiers", !18, !19, !20}
!18 = !{!"m_OpenCLArgTypeQualifiersVec[0]", !""}
!19 = !{!"m_OpenCLArgTypeQualifiersVec[1]", !""}
!20 = !{!"m_OpenCLArgTypeQualifiersVec[2]", !""}
!21 = !{void (i32, i32, i32 addrspace(1)*)* @test1, !22}
!22 = !{!23}
!23 = !{!"function_type", i32 0}

Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2023-2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
; REQUIRES: llvm-14-plus, regkeys
; RUN: igc_opt -platformbmg -igc-emit-visa -simd-mode 16 -inputrt -regkey ResourceLoopUnrollIteration=4,DumpVISAASMToConsole -S < %s | FileCheck %s
;
; Test checks how we emit ResourceLoop


@ThreadGroupSize_X = constant i32 64
@ThreadGroupSize_Y = constant i32 1
@ThreadGroupSize_Z = constant i32 1

%__2D_DIM_Resource = type opaque

define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) {
entry:
; CHECK: _main_0:

%svn0 = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17)
%sampler = zext i16 %svn0 to i32
%NonUniformSampler = inttoptr i32 %sampler to <4 x float> addrspace(2752518)*
; CHECK: mov (M1, 16) svn0(0,0)<1> threadIdInGroupX(0,0)<1;1,0>
; CHECK: mov (M1, 16) sampler(0,0)<1> svn0_0(0,0)<1;1,0>

%svn1 = extractelement <64 x i32> %src, i32 40
%texture = add i32 %svn1, 1280
%NonUniformTexture = inttoptr i32 %texture to %__2D_DIM_Resource addrspace(2621450)*
; CHECK: add (M1_NM, 1) texture(0,0)<1> src(2,8)<0;1,0> 0x500:w
; CHECK: mov (M1, 16) V0032(0,0)<1> 0x0:f

%call = tail call fast <4 x float> @llvm.genx.GenISA.sampleLptr.v4f32.f32.p2621443__2D_DIM_Resource.p2621443__2D_DIM_Resource.p2752518v4f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, %__2D_DIM_Resource addrspace(2621450)* undef, %__2D_DIM_Resource addrspace(2621450)* %NonUniformTexture, <4 x float> addrspace(2752518)* %NonUniformSampler, i32 0, i32 0, i32 0)
; CHECK: cmp.eq (M1, 16) P5 V0033(0,0)<0;1,0> V0033(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) V0034(0,0)<1> P5
; CHECK: setp (M1_NM, 16) P7 0x0:ud
; CHECK: cmp.eq (M1, 16) P7 V0035(0,0)<0;1,0> V0035(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) V0036(0,0)<1> P7
; CHECK: fbl (M1_NM, 1) V0037(0,0)<1> V0036(0,0)<0;1,0>
; CHECK: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w
; CHECK: addr_add (M1_NM, 1) A0(0)<1> &sampler_0 V0038(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) V0039(0,0)<1> r[A0(0),0]<0;1,0>:ud
; CHECK: cmp.eq (M1, 16) P6 V0039(0,0)<0;1,0> sampler_0(0,0)<1;1,0>
; CHECK: movs (M1_NM, 1) S31(0) V0039(0,0)<0;1,0>
; CHECK: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
; CHEKC: (P6) sample_lz.R (M1, 16) 0x0:uw S31 %bss call_.0 %null.0 V0032.0
; CHECK: or (M1_NM, 16) P4 P4 P6
; CHECK: xor (M1_NM, 16) P5 P5 P6
; CHECK: mov (M1_NM, 1) V0034(0,0)<1> P5
; CHECK: movs (M1_NM, 1) S31(0) V0039(0,0)<0;1,0>
; CHECK: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
; CHECK: sample_lz.R (M1, 16) 0x0:uw S31 %bss call_.0 %null.0 V0032.0
; CHECK: movs (M1_NM, 1) S31(0) V0039(0,0)<0;1,0>
; CHECK: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
; CHECK: sample_lz.R (M1, 16) 0x0:uw S31 %bss call_.0 %null.0 V0032.0
; CHECK: movs (M1_NM, 1) S31(0) V0039(0,0)<0;1,0>
; CHECK: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
; CHECK: sample_lz.R (M1, 16) 0x0:uw S31 %bss call_.0 %null.0 V0032.0
; CHECK: (!P4) goto (M1, 16) _test1_001__opt_resource_loop

%out = extractelement <4 x float> %call, i32 0
store float %out, float addrspace(1)* %dst, align 4
ret void
}

declare <4 x float> @llvm.genx.GenISA.sampleLptr.v4f32.f32.p2621443__2D_DIM_Resource.p2621443__2D_DIM_Resource.p2752518v4f32(float, float, float, float, float, %__2D_DIM_Resource addrspace(2621450)*, %__2D_DIM_Resource addrspace(2621450)*, <4 x float> addrspace(2752518)*, i32, i32, i32) #4

declare i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32) #1
declare i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32, i32, i32) #6

declare float @llvm.genx.GenISA.WaveAll.f32(float, i8, i32) #0
declare i32 @llvm.genx.GenISA.WaveBallot(i1, i32)
declare i32 @llvm.genx.GenISA.firstbitLo(i32)

attributes #4 = { argmemonly nounwind readonly }


!IGCMetadata = !{!0}
!igc.functions = !{!21}

!0 = !{!"ModuleMD", !1}
!1 = !{!"FuncMD", !2, !3}
!2 = !{!"FuncMDMap[0]", void (<64 x i32>, float addrspace(1)*)* @test1}
!3 = !{!"FuncMDValue[0]", !4, !17}
!4 = !{!"resAllocMD", !5}
!5 = !{!"argAllocMDList", !6, !10, !11, !14, !15, !16}
!6 = !{!"argAllocMDListVec[0]", !7, !8, !9}
!7 = !{!"type", i32 0}
!8 = !{!"extensionType", i32 -1}
!9 = !{!"indexType", i32 -1}
!10 = !{!"argAllocMDListVec[1]", !7, !8, !9}
!11 = !{!"argAllocMDListVec[2]", !12, !8, !13}
!12 = !{!"type", i32 1}
!13 = !{!"indexType", i32 0}
!14 = !{!"argAllocMDListVec[3]", !7, !8, !9}
!15 = !{!"argAllocMDListVec[4]", !7, !8, !9}
!16 = !{!"argAllocMDListVec[5]", !7, !8, !9}
!17 = !{!"m_OpenCLArgTypeQualifiers", !18, !19, !20}
!18 = !{!"m_OpenCLArgTypeQualifiersVec[0]", !""}
!19 = !{!"m_OpenCLArgTypeQualifiersVec[1]", !""}
!20 = !{!"m_OpenCLArgTypeQualifiersVec[2]", !""}
!21 = !{void (<64 x i32>, float addrspace(1)*)* @test1, !22}
!22 = !{!23}
!23 = !{!"function_type", i32 0}

0 comments on commit ab8932d

Please sign in to comment.