Skip to content

Commit

Permalink
[SYCL][E2E] Add tile store test with rowmajor and use.b. (intel#14698)
Browse files Browse the repository at this point in the history
  • Loading branch information
gpei-dev committed Sep 18, 2024
1 parent 0b65c98 commit dae7032
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//==---- joint_matrix_bf16_rowmajorB_load_store.cpp - DPC++ joint_matrix ---==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: aspect-ext_intel_matrix, cpu

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"

#define SG_SZ 32

#include "../joint_matrix_bf16_rowmajorB_load_store_impl.hpp"
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//==- joint_matrix_bf16_rowmajorB_pair_load_store.cpp - DPC++ joint_matrix--==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: aspect-ext_intel_matrix, cpu

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"

#define SG_SZ 32

#include "../joint_matrix_bf16_rowmajorB_pair_load_store_impl.hpp"
15 changes: 15 additions & 0 deletions sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_load_store.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
//==---- joint_matrix_bf16_rowmajorB_load_store.cpp - DPC++ joint_matrix ---==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: aspect-ext_intel_matrix, cpu

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "common.hpp"

#include "joint_matrix_bf16_rowmajorB_load_store_impl.hpp"
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//------------------------------------------------------------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-------------------------------------------------------------------------===//

#include <sycl/usm.hpp>

template <typename Tb, unsigned int rows, unsigned int cols>
void joint_B_rowmajor_load_store(Tb *B, Tb *OutB, queue &q) {

range<1> global{1};
range<1> local{1};

q.submit([&](handler &h) {
h.parallel_for<class Load>(
nd_range<1>{global, local}, [=](nd_item<1> it)
#ifdef SG_SZ
[[intel::reqd_sub_group_size(SG_SZ)]]
#endif
{
auto pB =
address_space_cast<sycl::access::address_space::global_space,
sycl::access::decorated::no>(B);
auto pOutB =
address_space_cast<sycl::access::address_space::global_space,
sycl::access::decorated::no>(OutB);

auto sg = it.get_sub_group();

joint_matrix<sub_group, Tb, use::b, rows, cols, layout::row_major> tB;

joint_matrix_load(sg, tB, pB, cols);
ext::intel::experimental::matrix::joint_matrix_store(sg, tB, pOutB,
cols);
}); // parallel_for
}); // queue.submit

q.wait();
}

template <typename Tb, size_t ROW_SIZE, size_t COL_SIZE> void test(queue &q) {
Tb *B = malloc_shared<Tb>(ROW_SIZE * COL_SIZE, q);
Tb *outB = malloc_shared<Tb>(ROW_SIZE * COL_SIZE, q);

matrix_fill(ROW_SIZE, COL_SIZE, B, [](int i, int j) { return i + j; });

joint_B_rowmajor_load_store<Tb, ROW_SIZE, COL_SIZE>(B, outB, q);

assert(matrix_compare(ROW_SIZE, COL_SIZE, outB, B));

free(B, q);
free(outB, q);
}

int main(void) {
queue q;

test<bfloat16, 8, 16>(q);

return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
//==- joint_matrix_bf16_rowmajorB_pair_load_store.cpp - DPC++ joint_matrix--==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: aspect-ext_intel_matrix, cpu

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "common.hpp"

#include "joint_matrix_bf16_rowmajorB_pair_load_store_impl.hpp"
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
//------------------------------------------------------------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-------------------------------------------------------------------------===//

#include <sycl/usm.hpp>

template <typename Tb, unsigned rows, unsigned cols, unsigned HW_MAX_COL_SIZE>
void joint_B_rowmajor_pair_load_store(Tb *B, Tb *OutB, queue &q) {

range<1> global{1};
range<1> local{1};

q.submit([&](handler &h) {
h.parallel_for<class Load>(
nd_range<1>{global, local}, [=](nd_item<1> it)
#ifdef SG_SZ
[[intel::reqd_sub_group_size(SG_SZ)]]
#endif
{
auto pB =
address_space_cast<sycl::access::address_space::global_space,
sycl::access::decorated::no>(B);
auto pOutB =
address_space_cast<sycl::access::address_space::global_space,
sycl::access::decorated::no>(OutB);

auto sg = it.get_sub_group();

joint_matrix<sub_group, Tb, use::b, rows, HW_MAX_COL_SIZE,
layout::row_major>
tB[2];

joint_matrix_load(sg, tB[0], pB, cols);
joint_matrix_load(sg, tB[1], pB + HW_MAX_COL_SIZE, cols);
ext::intel::experimental::matrix::joint_matrix_store(sg, tB[0], pOutB,
cols);
ext::intel::experimental::matrix::joint_matrix_store(
sg, tB[1], pOutB + HW_MAX_COL_SIZE, cols);
}); // parallel_for
}); // queue.submit

q.wait();
}

template <typename Tb, size_t ROW_SIZE, size_t COL_SIZE, size_t HW_MAX_COL_SIZE>
void test(queue &q) {
Tb *B = malloc_shared<Tb>(ROW_SIZE * COL_SIZE, q);
Tb *outB = malloc_shared<Tb>(ROW_SIZE * COL_SIZE, q);

matrix_fill(ROW_SIZE, COL_SIZE, B, [](int i, int j) { return i + j; });

joint_B_rowmajor_pair_load_store<Tb, ROW_SIZE, COL_SIZE, HW_MAX_COL_SIZE>(
B, outB, q);

assert(matrix_compare(ROW_SIZE, COL_SIZE, outB, B));

free(B, q);
free(outB, q);
}

int main(void) {
queue q;

test<bfloat16, 8, 32, 16>(q);
return 0;
}

0 comments on commit dae7032

Please sign in to comment.