diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_rowmajorB_load_store.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_rowmajorB_load_store.cpp new file mode 100644 index 0000000000000..731ae6a053bec --- /dev/null +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_rowmajorB_load_store.cpp @@ -0,0 +1,17 @@ +//==---- joint_matrix_bf16_rowmajorB_load_store.cpp - DPC++ joint_matrix ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: aspect-ext_intel_matrix, cpu + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include "../common.hpp" + +#define SG_SZ 32 + +#include "../joint_matrix_bf16_rowmajorB_load_store_impl.hpp" diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_rowmajorB_pair_load_store.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_rowmajorB_pair_load_store.cpp new file mode 100644 index 0000000000000..890938ae59c43 --- /dev/null +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_rowmajorB_pair_load_store.cpp @@ -0,0 +1,17 @@ +//==- joint_matrix_bf16_rowmajorB_pair_load_store.cpp - DPC++ joint_matrix--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: aspect-ext_intel_matrix, cpu + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include "../common.hpp" + +#define SG_SZ 32 + +#include "../joint_matrix_bf16_rowmajorB_pair_load_store_impl.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_load_store.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_load_store.cpp new file mode 100644 index 0000000000000..d41c746f3514f --- /dev/null +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_load_store.cpp @@ -0,0 +1,15 @@ +//==---- joint_matrix_bf16_rowmajorB_load_store.cpp - DPC++ joint_matrix ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: aspect-ext_intel_matrix, cpu + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include "common.hpp" + +#include "joint_matrix_bf16_rowmajorB_load_store_impl.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_load_store_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_load_store_impl.hpp new file mode 100644 index 0000000000000..b9f474b9758dc --- /dev/null +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_load_store_impl.hpp @@ -0,0 +1,64 @@ +//------------------------------------------------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-------------------------------------------------------------------------===// + +#include + +template +void joint_B_rowmajor_load_store(Tb *B, Tb *OutB, queue &q) { + + range<1> global{1}; + range<1> local{1}; + + q.submit([&](handler &h) { + h.parallel_for( + nd_range<1>{global, local}, [=](nd_item<1> it) +#ifdef SG_SZ + [[intel::reqd_sub_group_size(SG_SZ)]] +#endif + { + auto pB = + address_space_cast(B); + auto pOutB = + address_space_cast(OutB); + + auto sg = it.get_sub_group(); + + joint_matrix tB; + + joint_matrix_load(sg, tB, pB, cols); + ext::intel::experimental::matrix::joint_matrix_store(sg, tB, pOutB, + cols); + }); // parallel_for + }); // queue.submit + + q.wait(); +} + +template void test(queue &q) { + Tb *B = malloc_shared(ROW_SIZE * COL_SIZE, q); + Tb *outB = malloc_shared(ROW_SIZE * COL_SIZE, q); + + matrix_fill(ROW_SIZE, COL_SIZE, B, [](int i, int j) { return i + j; }); + + joint_B_rowmajor_load_store(B, outB, q); + + assert(matrix_compare(ROW_SIZE, COL_SIZE, outB, B)); + + free(B, q); + free(outB, q); +} + +int main(void) { + queue q; + + test(q); + + return 0; +} diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_pair_load_store.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_pair_load_store.cpp new file mode 100644 index 0000000000000..5fbe0aaf2d102 --- /dev/null +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_pair_load_store.cpp @@ -0,0 +1,15 @@ +//==- joint_matrix_bf16_rowmajorB_pair_load_store.cpp - DPC++ joint_matrix--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: aspect-ext_intel_matrix, cpu + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include "common.hpp" + +#include "joint_matrix_bf16_rowmajorB_pair_load_store_impl.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_pair_load_store_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_pair_load_store_impl.hpp new file mode 100644 index 0000000000000..0cacda21b98e2 --- /dev/null +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_pair_load_store_impl.hpp @@ -0,0 +1,70 @@ +//------------------------------------------------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-------------------------------------------------------------------------===// + +#include + +template +void joint_B_rowmajor_pair_load_store(Tb *B, Tb *OutB, queue &q) { + + range<1> global{1}; + range<1> local{1}; + + q.submit([&](handler &h) { + h.parallel_for( + nd_range<1>{global, local}, [=](nd_item<1> it) +#ifdef SG_SZ + [[intel::reqd_sub_group_size(SG_SZ)]] +#endif + { + auto pB = + address_space_cast(B); + auto pOutB = + address_space_cast(OutB); + + auto sg = it.get_sub_group(); + + joint_matrix + tB[2]; + + joint_matrix_load(sg, tB[0], pB, cols); + joint_matrix_load(sg, tB[1], pB + HW_MAX_COL_SIZE, cols); + ext::intel::experimental::matrix::joint_matrix_store(sg, tB[0], pOutB, + cols); + ext::intel::experimental::matrix::joint_matrix_store( + sg, tB[1], pOutB + HW_MAX_COL_SIZE, cols); + }); // parallel_for + }); // queue.submit + + q.wait(); +} + +template +void test(queue &q) { + Tb *B = malloc_shared(ROW_SIZE * COL_SIZE, q); + Tb *outB = malloc_shared(ROW_SIZE * COL_SIZE, q); + + matrix_fill(ROW_SIZE, COL_SIZE, B, [](int i, int j) { return i + j; }); + + joint_B_rowmajor_pair_load_store( + B, outB, q); + + assert(matrix_compare(ROW_SIZE, COL_SIZE, outB, B)); + + free(B, q); + free(outB, q); +} + +int main(void) { + queue q; + + test(q); + return 0; +}