Skip to content

Commit

Permalink
add a way to generate tf32 dpas currently (disabled by default)
Browse files Browse the repository at this point in the history
  • Loading branch information
bashbaug committed Mar 1, 2024
1 parent d5c3d6d commit 18096ee
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions samples/99_matrixexperimentstf32/matrix_helpers_tf32.cl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ float emu_sub_group_tf32_tf32_matrix_mad_k8(float a, float8 b, float acc)
{
float res = acc;

#if 1
res = fma(sub_group_broadcast(a, 0), b.s0, res);
res = fma(sub_group_broadcast(a, 1), b.s1, res);
res = fma(sub_group_broadcast(a, 2), b.s2, res);
Expand All @@ -21,6 +22,12 @@ float emu_sub_group_tf32_tf32_matrix_mad_k8(float a, float8 b, float acc)
res = fma(sub_group_broadcast(a, 5), b.s5, res);
res = fma(sub_group_broadcast(a, 6), b.s6, res);
res = fma(sub_group_broadcast(a, 7), b.s7, res);
#else
float __attribute__((overloadable)) intel_sub_group_tf32_tf32_matrix_mad_k8_f32(short a, int8 b, float acc);
uint a_ui = as_uint(sub_group_shuffle(a, get_sub_group_local_id() / 2));
short aData = get_sub_group_local_id() % 2 ? as_short2(a_ui).hi : as_short2(a_ui).lo;
res = intel_sub_group_tf32_tf32_matrix_mad_k8_f32(aData, as_int8(b), res);
#endif

return res;
}
Expand Down

0 comments on commit 18096ee

Please sign in to comment.