From 9a0317a38bf3b1d83ad99a91085b0fb92cf32d37 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Fri, 8 Mar 2024 14:51:26 -0800 Subject: [PATCH] fix the cooperative prefetching indexing calculation --- samples/99_matrixexperiments/matrix_kernel_tiled.cl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/samples/99_matrixexperiments/matrix_kernel_tiled.cl b/samples/99_matrixexperiments/matrix_kernel_tiled.cl index 1f8c08c..da5c8e4 100644 --- a/samples/99_matrixexperiments/matrix_kernel_tiled.cl +++ b/samples/99_matrixexperiments/matrix_kernel_tiled.cl @@ -555,9 +555,8 @@ void HELPER_NAME(atile_block_prefetch_rowmajor, MM, NN)(global ushort* A, int tM void HELPER_NAME(btile_block_prefetch_rowmajor, MM, NN)(global ushort* B, int tN, int K, int N, int k, int n) { - const int NUM_SGS = SGS_PER_WG_X * SGS_PER_WG_Y; - if (KK % 2 == 0 & NN == 4 & NUM_SGS >= 2) { - const int nn = (get_sub_group_id() % 2) * 2; + if (KK % 2 == 0 & NN == 4 & SGS_PER_WG_Y >= 2) { + const int nn = (get_sub_group_id() / SGS_PER_WG_X) % 2 * 2; for (int kk = 0; kk < KK; kk+=2) { intel_subgroup_block_prefetch_u16_m32k16v2(B, N * sizeof(ushort), K, N * sizeof(ushort), (int2)(n + nn * tN, k + kk * tK)); } @@ -590,9 +589,8 @@ void HELPER_NAME(btile_block_prefetch_rowmajor, MM, NN)(global ushort* B, int tN void HELPER_NAME(btile_block_prefetch_vnni, MM, NN)(global ushort* B, int tN, int K, int N, int k, int n) { - const int NUM_SGS = SGS_PER_WG_X * SGS_PER_WG_Y; - if (KK % 2 == 0 & NN == 4 & NUM_SGS >= 4) { - const int nn = get_sub_group_id() % 4; + if (KK % 2 == 0 & NN == 4 & SGS_PER_WG_Y >= 4) { + const int nn = (get_sub_group_id() / SGS_PER_WG_X) % 4; for (int kk = 0; kk < KK; kk+=2) { intel_subgroup_block_prefetch_u32_m16k16(B, N * sizeof(uint), K, N * sizeof(uint), (int2)(n + nn * tN, (k + kk * tK) / 2)); }