Skip to content

Commit

Permalink
[SPARSE] Add support for cuSPARSE backend
Browse files Browse the repository at this point in the history
  • Loading branch information
Rbiessy committed Sep 6, 2024
1 parent c9d0b47 commit fac276d
Show file tree
Hide file tree
Showing 51 changed files with 3,635 additions and 263 deletions.
8 changes: 6 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ option(ENABLE_CUFFT_BACKEND "Enable the cuFFT backend for the DFT interface" OFF
option(ENABLE_ROCFFT_BACKEND "Enable the rocFFT backend for the DFT interface" OFF)
option(ENABLE_PORTFFT_BACKEND "Enable the portFFT DFT backend for the DFT interface. Cannot be used with other DFT backends." OFF)

# sparse
option(ENABLE_CUSPARSE_BACKEND "Enable the cuSPARSE backend for the SPARSE_BLAS interface" OFF)

set(ONEMKL_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler")
set(HIP_TARGETS "" CACHE STRING "Target HIP architectures")

Expand Down Expand Up @@ -102,7 +105,8 @@ if(ENABLE_MKLGPU_BACKEND
list(APPEND DOMAINS_LIST "dft")
endif()
if(ENABLE_MKLCPU_BACKEND
OR ENABLE_MKLGPU_BACKEND)
OR ENABLE_MKLGPU_BACKEND
OR ENABLE_CUSPARSE_BACKEND)
list(APPEND DOMAINS_LIST "sparse_blas")
endif()

Expand All @@ -129,7 +133,7 @@ if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
string(REPLACE "\\" "/" CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
endif()
else()
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUFFT_BACKEND
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_CUSPARSE_BACKEND
OR ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCFFT_BACKEND)
set(CMAKE_CXX_COMPILER "clang++")
elseif(ENABLE_MKLGPU_BACKEND)
Expand Down
4 changes: 2 additions & 2 deletions cmake/FindCompiler.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ if(is_dpcpp)
# Check if the Nvidia target is supported. PortFFT uses this for choosing default configuration.
check_cxx_compiler_flag("-fsycl -fsycl-targets=nvptx64-nvidia-cuda" dpcpp_supports_nvptx64)

if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND)
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND)
list(APPEND UNIX_INTERFACE_COMPILE_OPTIONS
-fsycl-targets=nvptx64-nvidia-cuda -fsycl-unnamed-lambda)
list(APPEND UNIX_INTERFACE_LINK_OPTIONS
Expand All @@ -51,7 +51,7 @@ if(is_dpcpp)
-fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend
--offload-arch=${HIP_TARGETS})
endif()
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_ROCBLAS_BACKEND
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND OR ENABLE_ROCBLAS_BACKEND
OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
set_target_properties(ONEMKL::SYCL::SYCL PROPERTIES
INTERFACE_COMPILE_OPTIONS "${UNIX_INTERFACE_COMPILE_OPTIONS}"
Expand Down
8 changes: 6 additions & 2 deletions docs/building_the_project_with_dpcpp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ The most important supported build options are:
* - ENABLE_CURAND_BACKEND
- True, False
- False
* - ENABLE_CUSPARSE_BACKEND
- True, False
- False
* - ENABLE_NETLIB_BACKEND
- True, False
- False
Expand Down Expand Up @@ -183,8 +186,8 @@ Building for CUDA
^^^^^^^^^^^^^^^^^

The CUDA backends can be enabled with ``ENABLE_CUBLAS_BACKEND``,
``ENABLE_CUFFT_BACKEND``, ``ENABLE_CURAND_BACKEND``, and
``ENABLE_CUSOLVER_BACKEND``.
``ENABLE_CUFFT_BACKEND``, ``ENABLE_CURAND_BACKEND``,
``ENABLE_CUSOLVER_BACKEND``, and ``ENABLE_CUSPARSE_BACKEND``.

No additional parameters are required for using CUDA libraries. In most cases,
the CUDA libraries should be found automatically by CMake.
Expand Down Expand Up @@ -356,6 +359,7 @@ disabled using the Ninja build system:
-DENABLE_CUBLAS_BACKEND=True \
-DENABLE_CUSOLVER_BACKEND=True \
-DENABLE_CURAND_BACKEND=True \
-DENABLE_CUSPARSE_BACKEND=True \
-DBUILD_FUNCTIONAL_TESTS=False
``$ONEMKL_DIR`` points at the oneMKL source directly. The x86 CPU (``MKLCPU``)
Expand Down
136 changes: 136 additions & 0 deletions docs/domains/sparse_linear_algebra.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,139 @@ Currently known limitations:
``oneapi::mkl::unimplemented`` exception.
- Scalar parameters ``alpha`` and ``beta`` should be host pointers to prevent
synchronizations and copies to the host.


cuSPARSE backend
----------------

Currently known limitations:

- Using ``spmv`` with a ``type_view`` other than ``matrix_descr::general`` will
throw an ``oneapi::mkl::unimplemented`` exception.
- The COO format requires the indices to be sorted by row. See the `cuSPARSE
documentation
<https://docs.nvidia.com/cuda/cusparse/index.html#coordinate-coo>`_.


Operation algorithms mapping
----------------------------

The following tables describe how a oneMKL SYCL Interface algorithm maps to the
backend's algorithms. Refer to the backend's documentation for a more detailed
explanation of the algorithms.

Backends with no equivalent algorithms will fallback to the backend's default
behavior.


spmm
^^^^

.. list-table::
:header-rows: 1
:widths: 10 30 45

* - Value
- Description
- Backend equivalent
* - ``default_optimize_alg``
- Default algorithm.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
* - ``no_optimize_alg``
- Default algorithm but may skip some optimizations. Useful only if an
operation with the same configuration is run once.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
* - ``coo_alg1``
- Should provide best performance for COO format, small ``nnz`` and
column-major layout.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_COO_ALG1``
* - ``coo_alg2``
- Should provide best performance for COO format and column-major layout.
Produces deterministic results.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_COO_ALG2``
* - ``coo_alg3``
- Should provide best performance for COO format and large ``nnz``.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_COO_ALG3``
* - ``coo_alg4``
- Should provide best performance for COO format and row-major layout.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_COO_ALG4``
* - ``csr_alg1``
- Should provide best performance for CSR format and column-major layout.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_CSR_ALG1``
* - ``csr_alg2``
- Should provide best performance for CSR format and row-major layout.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_CSR_ALG2``
* - ``csr_alg3``
- Deterministic algorithm for CSR format.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_CSR_ALG3``

spmv
^^^^

.. list-table::
:header-rows: 1
:widths: 10 30 45

* - Value
- Description
- Backend equivalent
* - ``default_alg``
- Default algorithm.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMV_ALG_DEFAULT``
* - ``no_optimize_alg``
- Default algorithm but may skip some optimizations. Useful only if an
operation with the same configuration is run once.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
* - ``coo_alg1``
- Default algorithm for COO format.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMV_COO_ALG1``
* - ``coo_alg2``
- Deterministic algorithm for COO format.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMV_COO_ALG2``
* - ``csr_alg1``
- Default algorithm for CSR format.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMV_CSR_ALG1``
* - ``csr_alg2``
- Deterministic algorithm for CSR format.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMV_CSR_ALG2``
* - ``csr_alg3``
- LRB variant of the algorithm for CSR format.
- | MKL: none
| cuSPARSE: none

spsv
^^^^

.. list-table::
:header-rows: 1
:widths: 10 30 45

* - Value
- Description
- Backend equivalent
* - ``default_optimize_alg``
- Default algorithm.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
* - ``no_optimize_alg``
- Default algorithm but may skip some optimizations. Useful only if an
operation with the same configuration is run once.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
21 changes: 9 additions & 12 deletions examples/sparse_blas/compile_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,24 @@
#===============================================================================

#Build object from all sources
set(SPARSE_BLAS_BACKENDS "")

if(ENABLE_MKLCPU_BACKEND)
list(APPEND SPARSE_BLAS_BACKENDS "mklcpu")
set(SPARSE_CT_SOURCES "")
if(ENABLE_MKLCPU_BACKEND AND ENABLE_CUSPARSE_BACKEND)
list(APPEND SPARSE_CT_SOURCES "sparse_blas_spmv_usm_mklcpu_cusparse")
endif()

include(WarningsUtils)

foreach(backend ${SPARSE_BLAS_BACKENDS})
set(EXAMPLE_NAME example_sparse_blas_spmv_usm_${backend})
add_executable(${EXAMPLE_NAME} sparse_blas_spmv_usm_${backend}.cpp)
target_include_directories(${EXAMPLE_NAME}
foreach(sparse_ct_source ${SPARSE_CT_SOURCES})
add_executable(${sparse_ct_source} ${sparse_ct_source}.cpp)
target_include_directories(${sparse_ct_source}
PUBLIC ${PROJECT_SOURCE_DIR}/examples/include
PUBLIC ${PROJECT_SOURCE_DIR}/include
PUBLIC ${CMAKE_BINARY_DIR}/bin
)

add_dependencies(${EXAMPLE_NAME} onemkl_sparse_blas_${backend})
target_link_libraries(${EXAMPLE_NAME} PRIVATE ONEMKL::SYCL::SYCL onemkl_sparse_blas_${backend})
target_link_libraries(${sparse_ct_source} PRIVATE ONEMKL::SYCL::SYCL onemkl_sparse_blas_mklcpu onemkl_sparse_blas_cusparse)

# Register example as ctest
add_test(NAME sparse_blas/EXAMPLE/CT/sparse_blas_spmv_usm_${backend} COMMAND ${EXAMPLE_NAME})
endforeach(backend)
add_test(NAME sparse_blas/EXAMPLE/CT/${sparse_ct_source} COMMAND ${sparse_ct_source})
endforeach(sparse_ct_source)

Loading

0 comments on commit fac276d

Please sign in to comment.