Skip to content

Commit

Permalink
temporarily disable the large a matrix block load
Browse files Browse the repository at this point in the history
This is not working (silently failing) with some recent drivers, so
disable it for now.  Ideally we will be able to reenable it shortly.
  • Loading branch information
bashbaug committed Jan 19, 2024
1 parent 756d2e9 commit 4caea7b
Showing 1 changed file with 16 additions and 16 deletions.
32 changes: 16 additions & 16 deletions samples/99_matrixexperiments/matrix_kernel_tiled.cl
Original file line number Diff line number Diff line change
Expand Up @@ -202,17 +202,17 @@ kernel void MM_KERNEL_NAME(bfloat16_dpas_blockread_rowmajor_tiled, 8, 16, MM, NN

for (int k = 0; k < K; k += tK) {
short8 aData[MM];
if (MM % 2 == 0) {
for (int mm = 0; mm < MM; mm += 2) {
short16 aTemp = as_short16(intel_subgroup_block_read_u16_m16k16(A, K * sizeof(ushort), M, K * sizeof(ushort), (int2)(k, m + mm * tM)));
aData[mm + 0] = aTemp.lo;
aData[mm + 1] = aTemp.hi;
}
} else {
//if (MM % 2 == 0) {
// for (int mm = 0; mm < MM; mm += 2) {
// short16 aTemp = as_short16(intel_subgroup_block_read_u16_m16k16(A, K * sizeof(ushort), M, K * sizeof(ushort), (int2)(k, m + mm * tM)));
// aData[mm + 0] = aTemp.lo;
// aData[mm + 1] = aTemp.hi;
// }
//} else {
for (int mm = 0; mm < MM; mm++) {
aData[mm] = as_short8(intel_subgroup_block_read_u16_m8k16(A, K * sizeof(ushort), M, K * sizeof(ushort), (int2)(k, m + mm * tM)));
}
}
//}

int8 bData[NN];
for (int nn = 0; nn < NN; nn++) {
Expand Down Expand Up @@ -252,17 +252,17 @@ kernel void MM_KERNEL_NAME(bfloat16_dpas_blockread_vnni_tiled, 8, 16, MM, NN)(gl

for (int k = 0; k < K; k += tK) {
short8 aData[MM];
if (MM % 2 == 0) {
for (int mm = 0; mm < MM; mm += 2) {
short16 aTemp = as_short16(intel_subgroup_block_read_u16_m16k16(A, K * sizeof(ushort), M, K * sizeof(ushort), (int2)(k, m + mm * tM)));
aData[mm + 0] = aTemp.lo;
aData[mm + 1] = aTemp.hi;
}
} else {
//if (MM % 2 == 0) {
// for (int mm = 0; mm < MM; mm += 2) {
// short16 aTemp = as_short16(intel_subgroup_block_read_u16_m16k16(A, K * sizeof(ushort), M, K * sizeof(ushort), (int2)(k, m + mm * tM)));
// aData[mm + 0] = aTemp.lo;
// aData[mm + 1] = aTemp.hi;
// }
//} else {
for (int mm = 0; mm < MM; mm++) {
aData[mm] = as_short8(intel_subgroup_block_read_u16_m8k16(A, K * sizeof(ushort), M, K * sizeof(ushort), (int2)(k, m + mm * tM)));
}
}
//}

int8 bData[NN];
for (int nn = 0; nn < NN; nn++) {
Expand Down

0 comments on commit 4caea7b

Please sign in to comment.