Skip to content

Commit

Permalink
Add a matrix addition and multiplication example using mdspan
Browse files Browse the repository at this point in the history
  • Loading branch information
mehmetyusufoglu committed Jul 24, 2024
1 parent 01d123e commit d1d1c95
Show file tree
Hide file tree
Showing 5 changed files with 497 additions and 0 deletions.
3 changes: 3 additions & 0 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,6 @@ add_subdirectory("randomCells2D/")
add_subdirectory("reduce/")
add_subdirectory("tagSpecialization/")
add_subdirectory("vectorAdd/")
add_subdirectory("matrixAddWithMdspan/")
add_subdirectory("matrixMulWithMdspan/")

53 changes: 53 additions & 0 deletions example/matrixAddWithMdspan/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#
# Copyright 2023 Simeon Ehrig, Mehmet Yusufoglu
# SPDX-License-Identifier: MPL-2.0
#

################################################################################
# Required CMake version.

cmake_minimum_required(VERSION 3.22)

set_property(GLOBAL PROPERTY USE_FOLDERS ON)

################################################################################
# Project.

set(_TARGET_NAME matrixAddMdSpan)

project(${_TARGET_NAME} LANGUAGES CXX)

#-------------------------------------------------------------------------------
# Find alpaka.

if(NOT TARGET alpaka::alpaka)
option(alpaka_USE_SOURCE_TREE "Use alpaka's source tree instead of an alpaka installation" OFF)

if(alpaka_USE_SOURCE_TREE)
# Don't build the examples recursively
set(alpaka_BUILD_EXAMPLES OFF)
add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../.." "${CMAKE_BINARY_DIR}/alpaka")
else()
find_package(alpaka REQUIRED)
endif()
endif()


if (alpaka_USE_MDSPAN STREQUAL "OFF")
message(STATUS "The matrixAddMdSpan example requires mdspan. Please set alpaka_USE_MDSPAN accordingly. Example disabled.")
return()
endif ()

#-------------------------------------------------------------------------------
# Add executable.

alpaka_add_executable(
${_TARGET_NAME}
src/matrixAddMdSpan.cpp)
target_link_libraries(
${_TARGET_NAME}
PUBLIC alpaka::alpaka)

set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER example)

add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME})
181 changes: 181 additions & 0 deletions example/matrixAddWithMdspan/src/matrixAddMdSpan.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
/* Copyright 2024 Mehmet Yusufoglu, Simeon Ehrig, Andrea Bocci
* SPDX-License-Identifier: MPL-2.0
*/

#include <alpaka/alpaka.hpp>
// Needed for running example for all backends available; one by one
#include <alpaka/example/ExecuteForEachAccTag.hpp>

#include <experimental/mdspan>
#include <iostream>

//! Matrix addition example by using mdspan data structure

//! Some simple type traits for checking the types
//! isMdspan simply checks if a type is of type std::experimental::mdspan or not
//! Primary template for is_mdspan (defaults to false)
template<typename T>
struct IsMdspan : std::false_type
{
};

//! Specialization for mdspan with four template arguments
template<typename ElementType, typename Extents, typename LayoutPolicy, typename AccessorPolicy>
struct IsMdspan<std::experimental::mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy>> : std::true_type
{
};

template<typename T>
inline constexpr bool is_mdspan = IsMdspan<T>::value;

/**
* @brief Kernel for performing addition of two 2D matrices. Each element is added by a different thread. MdSpan data
* structure is used to pass the data to and from the kernel.
*/
struct MatrixAddKernel
{
//! \tparam TAcc Accelerator type
//! \tparam MdSpan The type of the multidimensional span (mdspan)
//! \param acc Accelerator
//! \param A First input matrix
//! \param B Second input matrix
//! \param C Output matrix where the result of A + B will be stored
template<typename TAcc, typename TMdSpan>
ALPAKA_FN_ACC void operator()(TAcc const& acc, TMdSpan A, TMdSpan B, TMdSpan C) const
{
// compile time checks
static_assert(is_mdspan<TMdSpan>, "The type TMdSpan should be an std mdspan");
static_assert(TMdSpan::rank() == 2);

auto const i = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
auto const j = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[1];

if(i < A.extent(0) && j < A.extent(1))
{
C(i, j) = A(i, j) + B(i, j);
}
}
};

// In standard projects, you typically do not execute the code with any available accelerator.
// Instead, a single accelerator is selected once from the active accelerators and the kernels are executed with the
// selected accelerator only. If you use the example as the starting point for your project, you can rename the
// example() function to main() and move the accelerator tag to the function body.
template<typename TAccTag>
auto example(TAccTag const&) -> int
{
// Index type
using Idx = std::size_t;
// Set number of dimensions (i.e 2) as a type
using Dim = alpaka::DimInt<2>;
// Set data type
using DataType = float;

// Define matrix dimensions
const Idx nX = 1024;
const Idx nY = 1024;

// Define device and queue
using Acc = alpaka::AccCpuSerial<Dim, Idx>;
using Queue = alpaka::Queue<Acc, alpaka::Blocking>;
using Vec = alpaka::Vec<Dim, Idx>;

auto const platformHost = alpaka::PlatformCpu{};
auto const devHost = alpaka::getDevByIdx(platformHost, 0);
auto const platformAcc = alpaka::Platform<Acc>{};
auto const devAcc = alpaka::getDevByIdx(platformAcc, 0);

Queue queue(devAcc);

// Define the 2D extent (dimensions)
alpaka::Vec<Dim, Idx> const extent2D(static_cast<Idx>(nX), static_cast<Idx>(nY));

// Allocate host memory
auto bufHostA = alpaka::allocBuf<DataType, Idx>(devHost, extent2D);
auto bufHostB = alpaka::allocBuf<DataType, Idx>(devHost, extent2D);
auto bufHostC = alpaka::allocBuf<DataType, Idx>(devHost, extent2D);

// Create mdspan view for bufHostA and bufHostB using alpaka::experimental::getMdSpan to fill the host buffers
auto mdHostA = alpaka::experimental::getMdSpan(bufHostA);
auto mdHostB = alpaka::experimental::getMdSpan(bufHostB);

// Initialize host matrices
for(Idx i = 0; i < nX; ++i)
{
for(Idx j = 0; j < nY; ++j)
{
// fill with some data
mdHostA(i, j) = static_cast<DataType>(i * nY + j);
mdHostB(i, j) = static_cast<DataType>(i * nY + j);
}
}

// Allocate device memory
auto bufDevA = alpaka::allocBuf<DataType, Idx>(devAcc, extent2D);
auto bufDevB = alpaka::allocBuf<DataType, Idx>(devAcc, extent2D);
auto bufDevC = alpaka::allocBuf<DataType, Idx>(devAcc, extent2D);

// Copy data to device, use directly host buffers (not mdspans used to fill the data)
alpaka::memcpy(queue, bufDevA, bufHostA);
alpaka::memcpy(queue, bufDevB, bufHostB);
alpaka::wait(queue);

// Create mdspan views for device buffers using alpaka::experimental::getMdSpan
auto mdDevA = alpaka::experimental::getMdSpan(bufDevA);
auto mdDevB = alpaka::experimental::getMdSpan(bufDevB);
auto mdDevC = alpaka::experimental::getMdSpan(bufDevC);

MatrixAddKernel kernel;
auto const& bundeledKernel = alpaka::KernelBundle(kernel, mdDevA, mdDevB, mdDevC);

// Let alpaka calculate good block and grid sizes given our full problem extent
auto const workDiv = alpaka::getValidWorkDivForKernel<Acc>(
devAcc,
bundeledKernel,
extent2D,
Vec::ones(),
false,
alpaka::GridBlockExtentSubDivRestrictions::Unrestricted);

// Execute the kernel
alpaka::exec<Acc>(queue, workDiv, MatrixAddKernel{}, mdDevA, mdDevB, mdDevC);

// Copy result back to host
alpaka::memcpy(queue, bufHostC, bufDevC);
alpaka::wait(queue);

// Verify the result
bool success = true;
auto mdHostC = alpaka::experimental::getMdSpan(bufHostC);
for(Idx i = 0; i < nX; ++i)
{
for(Idx j = 0; j < nY; ++j)
{
if(mdHostC(i, j) != 2 * static_cast<DataType>(i * nY + j))
{
success = false;
break;
}
}
}

std::cout << "Addition of matrices of size " << nX << "x" << nY << " using mdspan "
<< (success ? "succeeded" : "failed") << "!" << std::endl;
return EXIT_SUCCESS;
}

auto main() -> int
{
// Execute the example once for each enabled accelerator.
// If you would like to execute it for a single accelerator only you can use the following code.
// \code{.cpp}
// auto tag = TagCpuSerial;
// return example(tag);
// \endcode
//
// valid tags:
// TagCpuSerial, TagGpuHipRt, TagGpuCudaRt, TagCpuOmp2Blocks, TagCpuTbbBlocks,
// TagCpuOmp2Threads, TagCpuSycl, TagCpuTbbBlocks, TagCpuThreads,
// TagFpgaSyclIntel, TagGenericSycl, TagGpuSyclIntel
return alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
}
53 changes: 53 additions & 0 deletions example/matrixMulWithMdspan/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#
# Copyright 2023 Simeon Ehrig, Mehmet Yusufoglu
# SPDX-License-Identifier: MPL-2.0
#

################################################################################
# Required CMake version.

cmake_minimum_required(VERSION 3.22)

set_property(GLOBAL PROPERTY USE_FOLDERS ON)

################################################################################
# Project.

set(_TARGET_NAME matrixMulMdSpan)

project(${_TARGET_NAME} LANGUAGES CXX)

#-------------------------------------------------------------------------------
# Find alpaka.

if(NOT TARGET alpaka::alpaka)
option(alpaka_USE_SOURCE_TREE "Use alpaka's source tree instead of an alpaka installation" OFF)

if(alpaka_USE_SOURCE_TREE)
# Don't build the examples recursively
set(alpaka_BUILD_EXAMPLES OFF)
add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../.." "${CMAKE_BINARY_DIR}/alpaka")
else()
find_package(alpaka REQUIRED)
endif()
endif()


if (alpaka_USE_MDSPAN STREQUAL "OFF")
message(STATUS "The matrixMulMdSpan example requires mdspan. Please set alpaka_USE_MDSPAN accordingly. Example disabled.")
return()
endif ()

#-------------------------------------------------------------------------------
# Add executable.

alpaka_add_executable(
${_TARGET_NAME}
src/matrixMulMdSpan.cpp)
target_link_libraries(
${_TARGET_NAME}
PUBLIC alpaka::alpaka)

set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER example)

add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME})
Loading

0 comments on commit d1d1c95

Please sign in to comment.