From b49a304daee2db08044bffa65f3894c942b5185e Mon Sep 17 00:00:00 2001 From: mehmet yusufoglu Date: Fri, 19 Jul 2024 13:04:54 +0200 Subject: [PATCH] some refactoring --- example/matrixAddWithMdspan/src/matrixAddMdSpan.cpp | 5 +---- example/matrixMulWithMdspan/src/matrixMulMdSpan.cpp | 13 +++++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/example/matrixAddWithMdspan/src/matrixAddMdSpan.cpp b/example/matrixAddWithMdspan/src/matrixAddMdSpan.cpp index e80edfb0b22..eb76941bd7e 100644 --- a/example/matrixAddWithMdspan/src/matrixAddMdSpan.cpp +++ b/example/matrixAddWithMdspan/src/matrixAddMdSpan.cpp @@ -130,7 +130,6 @@ auto example(TAccTag const&) -> int auto mdDevB = alpaka::experimental::getMdSpan(bufDevB); auto mdDevC = alpaka::experimental::getMdSpan(bufDevC); - // Let alpaka calculate good block and grid sizes given our full problem extent. auto const workDiv = alpaka::getValidWorkDiv( devAcc, @@ -142,11 +141,9 @@ auto example(TAccTag const&) -> int // Execute the kernel alpaka::exec(queue, workDiv, MatrixAddKernel{}, mdDevA, mdDevB, mdDevC); - // Wait for the kernel to finish - alpaka::wait(queue); - // Copy result back to host alpaka::memcpy(queue, bufHostC, bufDevC); + // This wait is not necessary if the queue is a blocking queue alpaka::wait(queue); // Verify the result diff --git a/example/matrixMulWithMdspan/src/matrixMulMdSpan.cpp b/example/matrixMulWithMdspan/src/matrixMulMdSpan.cpp index 96b5b55333d..19b957ec023 100644 --- a/example/matrixMulWithMdspan/src/matrixMulMdSpan.cpp +++ b/example/matrixMulWithMdspan/src/matrixMulMdSpan.cpp @@ -44,11 +44,14 @@ struct MatrixMulKernel //! \param C Output matrix where the result of A * B will be stored //! \param K The shared dimension between A and B template - ALPAKA_FN_ACC void operator()(TAcc const& acc, MdSpan A, MdSpan B, MdSpan C, Idx K) const + ALPAKA_FN_ACC void operator()(TAcc const& acc, MdSpan A, MdSpan B, MdSpan C) const { - // compile time check + // compile time checks static_assert(isMdspan::value, "The type MdSpan should be an std mdspan"); + // A is MxK and B is KxN + auto const K = static_cast(A.extent(1)); + auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; @@ -146,13 +149,11 @@ auto example(TAccTag const&) -> int alpaka::GridBlockExtentSubDivRestrictions::Unrestricted); // Execute the kernel - alpaka::exec(queue, workDiv, MatrixMulKernel{}, mdDevA, mdDevB, mdDevC, K); - - // Wait for the kernel to finish - alpaka::wait(queue); + alpaka::exec(queue, workDiv, MatrixMulKernel{}, mdDevA, mdDevB, mdDevC); // Copy result back to host alpaka::memcpy(queue, bufHostC, bufDevC); + // This wait is not necessary if the queue is a blocking queue alpaka::wait(queue); // Verify the result