From b56a34ce1142ddb858db37a802558d91c4c650de Mon Sep 17 00:00:00 2001 From: "Zhang, Yixing" Date: Wed, 18 Sep 2024 14:19:20 -0700 Subject: [PATCH] update the test to address the commets --- ...joint_matrix_bf16_fill_k_cache_arg_dim.cpp | 8 ++-- .../joint_matrix_bf16_fill_k_cache_impl.hpp | 40 +++++++++---------- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp index aa2540b3781b7..5caf08a5f6bfc 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp @@ -1,4 +1,4 @@ -//==--- joint_matrix_bf16_fill_k_cache_OOB.cpp - DPC++ joint_matrix--------==// +//==--- joint_matrix_bf16_fill_k_cache_arg_dim.cpp - DPC++ joint_matrix--------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,11 +6,9 @@ // //===----------------------------------------------------------------------===// // REQUIRES: aspect-ext_intel_matrix - -// https://jira.devtools.intel.com/browse/GSD-9716 // XFAIL: arch-intel_gpu_pvc -// RUN: %{build} -o %t_arg_dim.out -ffp-model=precise -DARG_DIM -DVNNI +// RUN: %{build} -o %t_arg_dim_vnni.out -ffp-model=precise -DARG_DIM -DVNNI // RUN: %{run} %t_arg_dim_vnni.out // RUN: %{build} -o %t_arg_dim.out -ffp-model=precise -DARG_DIM @@ -19,4 +17,4 @@ // -ffp-model=precise is added to not depend on compiler defaults. #include "common.hpp" -#include "joint_matrix_bf16_fill_k_cache_impl.hpp" \ No newline at end of file +#include "joint_matrix_bf16_fill_k_cache_impl.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp index 4584aedbe3d01..47cfab5506187 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp @@ -36,22 +36,19 @@ static constexpr void manually_unroll_loop(F &&f) { template class MatMul; -#ifdef ARG_DIM -template -#else // ARG_DIM -template -#endif // ARG_DIM +double joint_matmul(TOperand *A, TOperand *B, TResult *C, queue &q, int i #ifdef ARG_DIM -double joint_matmul(TOperand *A, TOperand *B, TResult *C, queue &q, int i, size_t rowsA, size_t colsA, size_t rowsB, size_t colsB) { -#else // ARG_DIM -double joint_matmul(TOperand *A, TOperand *B, TResult *C, queue &q, int i) { -#endif // ARG_DIM + , size_t rowsA, size_t colsA, size_t rowsB, size_t colsB +#endif // ARG_DIM + ) { size_t sgSize = get_sg_size>(q); range<2> global{rowsA / MCache1, (colsB / NCache1) * sgSize}; @@ -393,17 +390,20 @@ void test() { // run testIterations time, aggregate and calculate average run time double totalDuration = 0; for (unsigned int i = 0; i < testIterations; i++) { + + double duration = + joint_matmul< +#ifndef ARG_DIM + MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE, +#endif // ARG_DIM + vnniFactor, T, TResult, TM, TN, TK, MCache1, NCache1, + KCache1, MCache2, NCache2, KCache2> + (A, B, C, q, i #ifdef ARG_DIM - double duration = - joint_matmul(A, B, C, q, i, - MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE); -#else // ARG_DIM - double duration = - joint_matmul(A, B, C, q, i); + , MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE #endif // ARG_DIM + ); + if (i >= recordThresh) { totalDuration += duration; }