Skip to content

Commit

Permalink
increase prefetch distance
Browse files Browse the repository at this point in the history
add helper functions for tiled kernels
  • Loading branch information
bashbaug committed Mar 2, 2024
1 parent 18096ee commit 459c109
Show file tree
Hide file tree
Showing 2 changed files with 244 additions and 267 deletions.
21 changes: 14 additions & 7 deletions samples/99_matrixexperiments/matrix_helpers.cl
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,8 @@ void prefetch_a_rowmajor_d16_m8_k16v2_sg8(global ushort* A, int rowStart, int co
{
#if defined(PREFETCH_DEFAULT)
uint offset = colStart + (rowStart + get_sub_group_local_id()) * stride;
prefetch(A + offset, 1);
__builtin_assume((ulong)(A + offset) % 4 == 0);
prefetch(A + offset, 8);
#endif // defined(PREFETCH_DEFAULT)
}

Expand Down Expand Up @@ -377,7 +378,8 @@ void prefetch_a_rowmajor_d16_m8v2_k16v2_sg16(global ushort* A, int rowStart, int
{
#if defined(PREFETCH_DEFAULT)
uint offset = colStart + (rowStart + get_sub_group_local_id()) * stride;
prefetch(A + offset, 1);
__builtin_assume((ulong)(A + offset) % 4 == 0);
prefetch(A + offset, 8);
#endif // defined(PREFETCH_DEFAULT)
}

Expand Down Expand Up @@ -446,8 +448,10 @@ void prefetch_b_rowmajor_d16_k16_n8v4_sg8(global ushort* B, int rowStart, int co
{
#if defined(PREFETCH_DEFAULT)
uint offset = colStart + (rowStart + get_sub_group_local_id()) * stride;
prefetch(B + offset, 1); offset += 8 * stride;
prefetch(B + offset, 1); offset += 8 * stride;
__builtin_assume((ulong)(B + offset) % 4 == 0);
prefetch(B + offset, 8); offset += 8 * stride;
__builtin_assume((ulong)(B + offset) % 4 == 0);
prefetch(B + offset, 8); offset += 8 * stride;
#endif // defined(PREFETCH_DEFAULT)
}

Expand All @@ -456,7 +460,8 @@ void prefetch_b_rowmajor_d16_k16_n16v2_sg16(global ushort* B, int rowStart, int
{
#if defined(PREFETCH_DEFAULT)
uint offset = colStart + (rowStart + get_sub_group_local_id()) * stride;
prefetch(B + offset, 1);
__builtin_assume((ulong)(B + offset) % 4 == 0);
prefetch(B + offset, 8);
#endif // defined(PREFETCH_DEFAULT)
}

Expand All @@ -466,7 +471,8 @@ void prefetch_b_vnni_d16_k16_n8v2_sg8(global ushort* B, int rowStart, int colSta
#if defined(PREFETCH_DEFAULT)
global uint* B_ui = (global uint*)B;
uint offset_ui = colStart + (rowStart / 2 + get_sub_group_local_id()) * stride;
prefetch(B_ui + offset_ui, 1);
__builtin_assume((ulong)(B_ui + offset_ui) % 4 == 0);
prefetch(B_ui + offset_ui, 4);
#endif // defined(PREFETCH_DEFAULT)
}

Expand All @@ -476,7 +482,8 @@ void prefetch_b_vnni_d16_k16v2_n16_sg16(global ushort* B, int rowStart, int colS
#if defined(PREFETCH_DEFAULT)
global uint* B_ui = (global uint*)B;
uint offset_ui = colStart + (rowStart / 2 + get_sub_group_local_id()) * stride;
prefetch(B_ui + offset_ui, 1);
__builtin_assume((ulong)(B_ui + offset_ui) % 4 == 0);
prefetch(B_ui + offset_ui, 4);
#endif // defined(PREFETCH_DEFAULT)
}

Expand Down
Loading

0 comments on commit 459c109

Please sign in to comment.