update the test to address the commets

intel · Sep 18, 2024 · b56a34c · b56a34c
1 parent 4e6593f
commit b56a34c
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 25 deletions.
diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp
@@ -1,16 +1,14 @@
-//==--- joint_matrix_bf16_fill_k_cache_OOB.cpp  - DPC++ joint_matrix--------==//
+//==--- joint_matrix_bf16_fill_k_cache_arg_dim.cpp  - DPC++ joint_matrix--------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix
-
-// https://jira.devtools.intel.com/browse/GSD-9716
 // XFAIL: arch-intel_gpu_pvc
 
-// RUN: %{build} -o %t_arg_dim.out -ffp-model=precise -DARG_DIM -DVNNI
+// RUN: %{build} -o %t_arg_dim_vnni.out -ffp-model=precise -DARG_DIM -DVNNI
 // RUN: %{run} %t_arg_dim_vnni.out
 
 // RUN: %{build} -o %t_arg_dim.out -ffp-model=precise -DARG_DIM
@@ -19,4 +17,4 @@
 // -ffp-model=precise is added to not depend on compiler defaults.
 
 #include "common.hpp"
-#include "joint_matrix_bf16_fill_k_cache_impl.hpp"
+#include "joint_matrix_bf16_fill_k_cache_impl.hpp"
diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp
@@ -36,22 +36,19 @@ static constexpr void manually_unroll_loop(F &&f) {
 
 template <size_t TM, size_t TN, size_t TK> class MatMul;
 
-#ifdef ARG_DIM
-template <size_t vnniFactor, typename TOperand, typename TResult, size_t TM,
-          size_t TN, size_t TK, size_t MCache1, size_t NCache1, size_t KCache1,
-          size_t MCache2, size_t NCache2, size_t KCache2>
-#else // ARG_DIM
-template <size_t rowsA, size_t colsA, size_t rowsB, size_t colsB,
+template <
+#ifndef ARG_DIM
+          size_t rowsA, size_t colsA, size_t rowsB, size_t colsB,
+#endif // ARG_DIM
           size_t vnniFactor, typename TOperand, typename TResult, size_t TM,
           size_t TN, size_t TK, size_t MCache1, size_t NCache1, size_t KCache1,
           size_t MCache2, size_t NCache2, size_t KCache2>
-#endif // ARG_DIM
 
+double joint_matmul(TOperand *A, TOperand *B, TResult *C, queue &q, int i
 #ifdef ARG_DIM
-double joint_matmul(TOperand *A, TOperand *B, TResult *C, queue &q, int i, size_t rowsA, size_t colsA, size_t rowsB, size_t colsB) {
-#else  // ARG_DIM
-double joint_matmul(TOperand *A, TOperand *B, TResult *C, queue &q, int i) {
-#endif // ARG_DIM
+                   , size_t rowsA, size_t colsA, size_t rowsB, size_t colsB
+#endif // ARG_DIM                   
+                  ) {
 
   size_t sgSize = get_sg_size<MatMul<TM, TN, TK>>(q);
   range<2> global{rowsA / MCache1, (colsB / NCache1) * sgSize};
@@ -393,17 +390,20 @@ void test() {
   // run testIterations time, aggregate and calculate average run time
   double totalDuration = 0;
   for (unsigned int i = 0; i < testIterations; i++) {
+
+  double duration =
+        joint_matmul<        
+#ifndef ARG_DIM
+                    MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE,
+#endif // ARG_DIM
+                    vnniFactor, T, TResult, TM, TN, TK, MCache1, NCache1,
+                    KCache1, MCache2, NCache2, KCache2>
+                    (A, B, C, q, i
 #ifdef ARG_DIM
-    double duration =
-        joint_matmul<vnniFactor, T, TResult, TM, TN, TK, MCache1, NCache1,
-                     KCache1, MCache2, NCache2, KCache2>(A, B, C, q, i, 
-                     MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE);
-#else // ARG_DIM
-    double duration =
-        joint_matmul<MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE,
-                     vnniFactor, T, TResult, TM, TN, TK, MCache1, NCache1,
-                     KCache1, MCache2, NCache2, KCache2>(A, B, C, q, i);
+                    , MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE
 #endif // ARG_DIM
+                    );
+
     if (i >= recordThresh) {
       totalDuration += duration;
     }