From fed879fb854f1f31c33ef8419244c8695104c69d Mon Sep 17 00:00:00 2001 From: Bing1 Yu Date: Wed, 31 May 2023 19:38:45 +0800 Subject: [PATCH] [SYCL][Matrix] Change packed to ext_intel_packed --- .../sycl/ext/oneapi/matrix/matrix-intel.hpp | 7 ++++--- .../sycl/ext/oneapi/matrix/matrix-unified.hpp | 4 ++-- .../element_wise_all_ops_int8_packed_impl.hpp | 10 +++++----- .../element_wise_irreg_sum_rows_impl.hpp | 2 +- sycl/test-e2e/Matrix/element_wise_ops_impl.hpp | 2 +- .../Matrix/elemwise_irreg_size_ops_bf16.cpp | 2 +- .../Matrix/get_coord_bf16_gemm_impl.hpp | 2 +- .../Matrix/get_coord_bf16_matB_impl.hpp | 2 +- .../Matrix/joint_matrix_all_sizes_impl.hpp | 2 +- .../joint_matrix_bf16_fill_k_cache_impl.hpp | 18 +++++++++--------- .../joint_matrix_bfloat16_32x64_impl.hpp | 2 +- .../joint_matrix_bfloat16_array_impl.hpp | 2 +- .../Matrix/joint_matrix_bfloat16_impl.hpp | 2 +- .../test-e2e/Matrix/joint_matrix_half_impl.hpp | 2 +- .../Matrix/joint_matrix_query_default.cpp | 2 +- .../Matrix/joint_matrix_ss_int8_impl.hpp | 2 +- .../Matrix/joint_matrix_su_int8_impl.hpp | 2 +- .../Matrix/joint_matrix_us_int8_impl.hpp | 2 +- .../Matrix/joint_matrix_uu_int8_impl.hpp | 2 +- .../matrix/matrix_load_store_as.cpp | 2 +- .../matrix-bfloat16-test-coord-basicB.cpp | 2 +- sycl/test/matrix/matrix-bfloat16-test.cpp | 2 +- sycl/test/matrix/matrix-elemwise-ops.cpp | 2 +- sycl/test/matrix/matrix-int8-test.cpp | 2 +- sycl/test/matrix/query-use.cpp | 4 ++-- 25 files changed, 42 insertions(+), 41 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/matrix/matrix-intel.hpp b/sycl/include/sycl/ext/oneapi/matrix/matrix-intel.hpp index cee1a76bd9a77..63590e2fb8905 100644 --- a/sycl/include/sycl/ext/oneapi/matrix/matrix-intel.hpp +++ b/sycl/include/sycl/ext/oneapi/matrix/matrix-intel.hpp @@ -18,7 +18,7 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace ext { namespace intel::experimental::matrix::layout { -constexpr sycl::ext::oneapi::experimental::matrix::layout packed = +constexpr sycl::ext::oneapi::experimental::matrix::layout ext_intel_packed = static_cast(2); } namespace oneapi { @@ -36,8 +36,9 @@ template struct spv_matrix_layout_traits { SPV_MATRIX_LAYOUT_TRAITS(layout::row_major, __spv::MatrixLayout::RowMajor) SPV_MATRIX_LAYOUT_TRAITS(layout::col_major, __spv::MatrixLayout::ColumnMajor) -SPV_MATRIX_LAYOUT_TRAITS(sycl::ext::intel::experimental::matrix::layout::packed, - __spv::MatrixLayout::Packed) +SPV_MATRIX_LAYOUT_TRAITS( + sycl::ext::intel::experimental::matrix::layout::ext_intel_packed, + __spv::MatrixLayout::Packed) SPV_MATRIX_LAYOUT_TRAITS(layout::dynamic, __spv::MatrixLayout::Dynamic) template struct spv_matrix_use_traits { diff --git a/sycl/include/sycl/ext/oneapi/matrix/matrix-unified.hpp b/sycl/include/sycl/ext/oneapi/matrix/matrix-unified.hpp index dab25807e93b6..49026a39d325c 100644 --- a/sycl/include/sycl/ext/oneapi/matrix/matrix-unified.hpp +++ b/sycl/include/sycl/ext/oneapi/matrix/matrix-unified.hpp @@ -228,7 +228,7 @@ inline __SYCL_ALWAYS_INLINE void joint_matrix_load( Ptr, stride, __spv::MatrixLayout::ColumnMajor, spv_scope_traits::value); break; - case sycl::ext::intel::experimental::matrix::layout::packed: + case sycl::ext::intel::experimental::matrix::layout::ext_intel_packed: res.spvm = __spirv_JointMatrixLoadINTEL< DecorT, S, NumRows, NumCols, spv_matrix_use_traits::value, @@ -327,7 +327,7 @@ inline __SYCL_ALWAYS_INLINE void joint_matrix_store( Ptr, src.spvm, stride, __spv::MatrixLayout::ColumnMajor, spv_scope_traits::value); break; - case sycl::ext::intel::experimental::matrix::layout::packed: + case sycl::ext::intel::experimental::matrix::layout::ext_intel_packed: __spirv_JointMatrixStoreINTEL< DecorT, T, NumRows, NumCols, spv_matrix_use_traits::value, diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_int8_packed_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_int8_packed_impl.hpp index 660fd9b88e4f3..012f73a4797ee 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_int8_packed_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_int8_packed_impl.hpp @@ -38,7 +38,7 @@ void matrix_verify_add(queue q, big_matrix &A, nd_range<2> &r, sub_group sg = spmd_item.get_sub_group(); joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix_fill(sg, sub_b, 5); @@ -75,7 +75,7 @@ void matrix_verify_sub(queue q, big_matrix &A, nd_range<2> &r, sub_group sg = spmd_item.get_sub_group(); joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix_fill(sg, sub_b, 5); @@ -112,7 +112,7 @@ void matrix_verify_mul(queue q, big_matrix &A, nd_range<2> &r, sub_group sg = spmd_item.get_sub_group(); joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix_fill(sg, sub_b, 5); @@ -149,7 +149,7 @@ void matrix_verify_div(queue q, big_matrix &A, nd_range<2> &r, sub_group sg = spmd_item.get_sub_group(); joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix_fill(sg, sub_b, 4); @@ -186,7 +186,7 @@ void matrix_verify_logic(queue q, big_matrix &A, nd_range<2> &r, sub_group sg = spmd_item.get_sub_group(); joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix_fill(sg, sub_b, 5); diff --git a/sycl/test-e2e/Matrix/element_wise_irreg_sum_rows_impl.hpp b/sycl/test-e2e/Matrix/element_wise_irreg_sum_rows_impl.hpp index 5dd2e1e4807f8..d63f36362e551 100644 --- a/sycl/test-e2e/Matrix/element_wise_irreg_sum_rows_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_irreg_sum_rows_impl.hpp @@ -46,7 +46,7 @@ void matrix_sum_rows(queue q, big_matrix &B, nd_range<2> &r) { ext::oneapi::sub_group sg = spmd_item.get_sub_group(); joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix_load( diff --git a/sycl/test-e2e/Matrix/element_wise_ops_impl.hpp b/sycl/test-e2e/Matrix/element_wise_ops_impl.hpp index 67f32c967f789..1732a8104e2d9 100644 --- a/sycl/test-e2e/Matrix/element_wise_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_ops_impl.hpp @@ -53,7 +53,7 @@ void matrix_multiply(big_matrix &C, sub_a; // For B, we assume B has been already VNNIed. joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; diff --git a/sycl/test-e2e/Matrix/elemwise_irreg_size_ops_bf16.cpp b/sycl/test-e2e/Matrix/elemwise_irreg_size_ops_bf16.cpp index 8e2865de207b4..12c335417bf9a 100644 --- a/sycl/test-e2e/Matrix/elemwise_irreg_size_ops_bf16.cpp +++ b/sycl/test-e2e/Matrix/elemwise_irreg_size_ops_bf16.cpp @@ -81,7 +81,7 @@ void matrix_multiply(big_matrix &C, sub_a; // For B, we assume B has been already VNNIed. joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; joint_matrix_load( diff --git a/sycl/test-e2e/Matrix/get_coord_bf16_gemm_impl.hpp b/sycl/test-e2e/Matrix/get_coord_bf16_gemm_impl.hpp index c154ff9f7d36a..bc72cb7eef0f3 100644 --- a/sycl/test-e2e/Matrix/get_coord_bf16_gemm_impl.hpp +++ b/sycl/test-e2e/Matrix/get_coord_bf16_gemm_impl.hpp @@ -73,7 +73,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, sub_a; // For B, we assume B has been already VNNIed. joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; diff --git a/sycl/test-e2e/Matrix/get_coord_bf16_matB_impl.hpp b/sycl/test-e2e/Matrix/get_coord_bf16_matB_impl.hpp index 76a8968239ced..f85bec8dbf023 100644 --- a/sycl/test-e2e/Matrix/get_coord_bf16_matB_impl.hpp +++ b/sycl/test-e2e/Matrix/get_coord_bf16_matB_impl.hpp @@ -136,7 +136,7 @@ void matrix_sum_cols(queue q, big_matrix &B, nd_range<2> &r) { // TK = 32, TN = 16 joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix_load( diff --git a/sycl/test-e2e/Matrix/joint_matrix_all_sizes_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_all_sizes_impl.hpp index a64a51da5182d..84fd76867df77 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_all_sizes_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_all_sizes_impl.hpp @@ -57,7 +57,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, joint_matrix sub_a; // For B, we assume B has been already VNNIed. joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp index 29098c6380982..eb2786193628e 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp @@ -153,35 +153,35 @@ double joint_matmul(TOperand *A, TOperand *B, TResult *C, queue &q, int i) { ; joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> tB[NCACHE1 / tN][KCACHE2 / KCACHE1] #ifdef INIT_LIST = { joint_matrix< sub_group, TOperand, use::b, tK, tN, - ext::intel::experimental::matrix::layout::packed>(), + ext::intel::experimental::matrix::layout::ext_intel_packed>(), joint_matrix< sub_group, TOperand, use::b, tK, tN, - ext::intel::experimental::matrix::layout::packed>(), + ext::intel::experimental::matrix::layout::ext_intel_packed>(), joint_matrix< sub_group, TOperand, use::b, tK, tN, - ext::intel::experimental::matrix::layout::packed>(), + ext::intel::experimental::matrix::layout::ext_intel_packed>(), joint_matrix< sub_group, TOperand, use::b, tK, tN, - ext::intel::experimental::matrix::layout::packed>(), + ext::intel::experimental::matrix::layout::ext_intel_packed>(), joint_matrix< sub_group, TOperand, use::b, tK, tN, - ext::intel::experimental::matrix::layout::packed>(), + ext::intel::experimental::matrix::layout::ext_intel_packed>(), joint_matrix< sub_group, TOperand, use::b, tK, tN, - ext::intel::experimental::matrix::layout::packed>(), + ext::intel::experimental::matrix::layout::ext_intel_packed>(), joint_matrix< sub_group, TOperand, use::b, tK, tN, - ext::intel::experimental::matrix::layout::packed>(), + ext::intel::experimental::matrix::layout::ext_intel_packed>(), joint_matrix< sub_group, TOperand, use::b, tK, tN, - ext::intel::experimental::matrix::layout::packed>(), + ext::intel::experimental::matrix::layout::ext_intel_packed>(), } #endif ; diff --git a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_32x64_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_32x64_impl.hpp index cc0196660744a..dae2de167a5a8 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_32x64_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_32x64_impl.hpp @@ -47,7 +47,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, sub_a; // For B, we assume B has been already VNNIed. joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; diff --git a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_array_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_array_impl.hpp index d1751386a8d51..eb6e3aa68c152 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_array_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_array_impl.hpp @@ -61,7 +61,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, // For B, we assume B has been already VNNIed. joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c[JM_ARRAY_SZ]; diff --git a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_impl.hpp index 4dfb4b929041c..fa0dbc7d919d4 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_impl.hpp @@ -47,7 +47,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, sub_a; // For B, we assume B has been already VNNIed. joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; diff --git a/sycl/test-e2e/Matrix/joint_matrix_half_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_half_impl.hpp index 453d217a6a61d..233da594052a3 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_half_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_half_impl.hpp @@ -52,7 +52,7 @@ void matrix_multiply(big_matrix &C, sub_a; // For B, we assume B has been already VNNIed. joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; diff --git a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp index 5e4c8250b3b4b..71e896caf570e 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp @@ -78,7 +78,7 @@ void matrix_multiply(big_matrix &C, myparams2::joint_matrix_a sub_a; myparams2::joint_matrix_b< - sub_group, ext::intel::experimental::matrix::layout::packed> + sub_group, ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; myparams2::joint_matrix_accumulator sub_c; diff --git a/sycl/test-e2e/Matrix/joint_matrix_ss_int8_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_ss_int8_impl.hpp index 4042cc1730d7f..cd0b391a9106e 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_ss_int8_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_ss_int8_impl.hpp @@ -53,7 +53,7 @@ void matrix_multiply(big_matrix &C, sub_a; // For B, we assume B has been already VNNIed. joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; diff --git a/sycl/test-e2e/Matrix/joint_matrix_su_int8_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_su_int8_impl.hpp index faeb2ca7b12b1..81f4735e77ec4 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_su_int8_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_su_int8_impl.hpp @@ -53,7 +53,7 @@ void matrix_multiply(big_matrix &C, sub_a; // For B, we assume B has been already VNNIed. joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; diff --git a/sycl/test-e2e/Matrix/joint_matrix_us_int8_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_us_int8_impl.hpp index 5eb63fac8075d..6652cf97be79a 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_us_int8_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_us_int8_impl.hpp @@ -55,7 +55,7 @@ void matrix_multiply(big_matrix &C, sub_a; // For B, we assume B has been already VNNIed. joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; diff --git a/sycl/test-e2e/Matrix/joint_matrix_uu_int8_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_uu_int8_impl.hpp index 62bad8422833e..8ee35e944181c 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_uu_int8_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_uu_int8_impl.hpp @@ -53,7 +53,7 @@ void matrix_multiply(big_matrix &C, sub_a; // For B, we assume B has been already VNNIed. joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; diff --git a/sycl/test/check_device_code/matrix/matrix_load_store_as.cpp b/sycl/test/check_device_code/matrix/matrix_load_store_as.cpp index b66be2a0b6187..9c90aec6f7f1f 100644 --- a/sycl/test/check_device_code/matrix/matrix_load_store_as.cpp +++ b/sycl/test/check_device_code/matrix/matrix_load_store_as.cpp @@ -27,7 +27,7 @@ int main(void) { layout::row_major> tA; joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> tB; joint_matrix tC; diff --git a/sycl/test/matrix/matrix-bfloat16-test-coord-basicB.cpp b/sycl/test/matrix/matrix-bfloat16-test-coord-basicB.cpp index 2f7424ac33525..750e24844d20c 100644 --- a/sycl/test/matrix/matrix-bfloat16-test-coord-basicB.cpp +++ b/sycl/test/matrix/matrix-bfloat16-test-coord-basicB.cpp @@ -155,7 +155,7 @@ void matrix_sum_cols(queue q, big_matrix &B, nd_range<2> &r) { // TK = 32, TN = 16 joint_matrix + ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix_load( diff --git a/sycl/test/matrix/matrix-bfloat16-test.cpp b/sycl/test/matrix/matrix-bfloat16-test.cpp index 2e0e309081464..922818538cacc 100644 --- a/sycl/test/matrix/matrix-bfloat16-test.cpp +++ b/sycl/test/matrix/matrix-bfloat16-test.cpp @@ -68,7 +68,7 @@ void matrix_multiply(big_matrix &C, // the packed_b layout. By default, the layout is row_major and size // is (TK, TN). joint_matrix + sycl::ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; diff --git a/sycl/test/matrix/matrix-elemwise-ops.cpp b/sycl/test/matrix/matrix-elemwise-ops.cpp index 3205e4c346ba6..eab63a50d5ad0 100644 --- a/sycl/test/matrix/matrix-elemwise-ops.cpp +++ b/sycl/test/matrix/matrix-elemwise-ops.cpp @@ -69,7 +69,7 @@ void matrix_multiply(big_matrix &C, // the packed_b layout. By default, the layout is row_major and size // is (TK, TN). joint_matrix + sycl::ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; diff --git a/sycl/test/matrix/matrix-int8-test.cpp b/sycl/test/matrix/matrix-int8-test.cpp index 63866c19f89fa..5553d75014cf7 100644 --- a/sycl/test/matrix/matrix-int8-test.cpp +++ b/sycl/test/matrix/matrix-int8-test.cpp @@ -74,7 +74,7 @@ void matrix_multiply(big_matrix &C, // the packed_b layout. By default, the layout is row_major and size // is (TK, TN). joint_matrix + sycl::ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b; joint_matrix sub_c; diff --git a/sycl/test/matrix/query-use.cpp b/sycl/test/matrix/query-use.cpp index 9afc8e1173043..dcbedef673a9d 100644 --- a/sycl/test/matrix/query-use.cpp +++ b/sycl/test/matrix/query-use.cpp @@ -64,7 +64,7 @@ void query_amx() { sub_group sg = spmd_item.get_sub_group(); myparams2::joint_matrix_a sub_a1; myparams2::joint_matrix_b< - sub_group, sycl::ext::intel::experimental::matrix::layout::packed> + sub_group, sycl::ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b1; myparams2::joint_matrix_accumulator sub_c1; @@ -144,7 +144,7 @@ void query_xmx8() { sub_group sg = spmd_item.get_sub_group(); myparams2::joint_matrix_a sub_a1; myparams2::joint_matrix_b< - sub_group, sycl::ext::intel::experimental::matrix::layout::packed> + sub_group, sycl::ext::intel::experimental::matrix::layout::ext_intel_packed> sub_b1; myparams2::joint_matrix_accumulator sub_c1;