Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rocSPARSE general functions (part III) #114

Merged
merged 10 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Libraries/rocSPARSE/level_2/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
# Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand All @@ -27,10 +27,12 @@ add_subdirectory(bsrmv)
add_subdirectory(bsrsv)
add_subdirectory(bsrxmv)
add_subdirectory(coomv)
add_subdirectory(csritsv)
add_subdirectory(csrmv)
add_subdirectory(csrsv)
add_subdirectory(ellmv)
add_subdirectory(gebsrmv)
add_subdirectory(gemvi)
add_subdirectory(spitsv)
add_subdirectory(spmv)
add_subdirectory(spsv)
4 changes: 3 additions & 1 deletion Libraries/rocSPARSE/level_2/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
# Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand All @@ -25,11 +25,13 @@ EXAMPLES := \
bsrsv \
bsrxmv \
coomv \
csritsv \
csrmv \
csrsv \
ellmv \
gebsrmv \
gemvi \
spitsv \
spmv \
spsv

Expand Down
1 change: 1 addition & 0 deletions Libraries/rocSPARSE/level_2/csritsv/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
rocsparse_csritsv
62 changes: 62 additions & 0 deletions Libraries/rocSPARSE/level_2/csritsv/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# MIT License
#
# Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

set(example_name rocsparse_csritsv)

cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
project(${example_name} LANGUAGES CXX)

if(GPU_RUNTIME STREQUAL "CUDA")
message(STATUS "rocSPARSE examples do not support the CUDA runtime")
return()
endif()

# This example does not contain device code, thereby it can be compiled with any conforming C++ compiler.

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

if(WIN32)
set(ROCM_ROOT "$ENV{HIP_PATH}" CACHE PATH "Root directory of the ROCm installation")
else()
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
endif()

list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}")

find_package(rocsparse REQUIRED)

add_executable(${example_name} main.cpp)
# Make example runnable using ctest
add_test(${example_name} ${example_name})

# Link to example library
target_link_libraries(${example_name} PRIVATE roc::rocsparse hip::host)

target_include_directories(${example_name} PRIVATE "../../../../Common")

install(TARGETS ${example_name})

if(CMAKE_SYSTEM_NAME MATCHES Windows)
install(IMPORTED_RUNTIME_ARTIFACTS roc::rocsparse)
endif()
58 changes: 58 additions & 0 deletions Libraries/rocSPARSE/level_2/csritsv/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# MIT License
#
# Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

EXAMPLE := rocsparse_csritsv
COMMON_INCLUDE_DIR := ../../../../Common
GPU_RUNTIME := HIP

ifneq ($(GPU_RUNTIME), HIP)
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
endif

ROCM_INSTALL_DIR := /opt/rocm

HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
ROCSPARSE_INCLUDE_DIR := $(HIP_INCLUDE_DIR)

CXX ?= g++

# Common variables and flags
CXX_STD := c++17
ICXXFLAGS := -std=$(CXX_STD)
ICPPFLAGS := -isystem $(ROCSPARSE_INCLUDE_DIR) -isystem $(HIP_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -D__HIP_PLATFORM_AMD__
ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib
ILDLIBS := -lrocsparse -lamdhip64

CXXFLAGS ?= -Wall -Wextra

ICXXFLAGS += $(CXXFLAGS)
ICPPFLAGS += $(CPPFLAGS)
ILDFLAGS += $(LDFLAGS)
ILDLIBS += $(LDLIBS)

$(EXAMPLE): main.cpp $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/rocsparse_utils.hpp
$(CXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)

clean:
$(RM) $(EXAMPLE)

.PHONY: clean
156 changes: 156 additions & 0 deletions Libraries/rocSPARSE/level_2/csritsv/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# rocSPARSE Level 2 CSR Iterative Triangular Matrix-Vector Multiplication

## Description

This example illustrates the use of the `rocSPARSE` level 2 iterative triangular solver using the CSR storage format.

This triangular solver is used to find an iterative solution with Jacobi method for a linear system of the form

$$
A' y \approx \alpha x,
$$

with a `tolerance` and a `max_iter` maximal number of iterations where

- $A$ is a sparse triangular matrix of order $n$ whose elements are the coefficients of the equations,
- $A'$ is one of the following:
- $A' = A$ (identity)
- $A' = A^T$ (transpose $A$: $A_{ij}^T = A_{ji}$)
- $A' = A^H$ (conjugate transpose/Hermitian $A$: $A_{ij}^H = \bar A_{ji}$),
- $\alpha$ is a scalar,
- $x$ is a dense vector of size $n$ containing the constant terms of the equations, and
- $y$ is a dense vector of size $n$ which contains the unknowns of the system.

Obtaining solution for such a system consists on finding concrete values of all the unknowns such that the above equality holds.

### Application flow

1. Setup input data.
2. Allocate device memory and offload input data to device.
3. Initialize rocSPARSE by creating a handle.
4. Prepare utility variables for rocSPARSE csritsv invocation.
5. Perform analysis step.
6. Perform triangular solve $A' y = \alpha x$.
7. Check results obtained.
8. Copy solution vector $y$ from device to host and compare with expected result.
9. Free rocSPARSE resources and device memory.
10. Print validation result.

## Key APIs and Concepts

### CSR Matrix Storage Format

The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.

Defining

- `m`: number of rows
- `n`: number of columns
- `nnz`: number of non-zero elements

we can describe a sparse matrix using the following arrays:

- `csr_val`: array storing the non-zero elements of the matrix.
- `csr_row_ptr`: given $i \in [0, m]$
- if $` 0 \leq i < m `$, `csr_row_ptr[i]` stores the index of the first non-zero element in row $i$ of the matrix
- if $i = m$, `csr_row_ptr[i]` stores `nnz`.

This way, row $j \in [0, m)$ contains the non-zero elements of indices from `csr_row_ptr[j]` to `csr_row_ptr[j+1]-1`. Therefore, the corresponding values in `csr_val` can be accessed from `csr_row_ptr[j]` to `csr_row_ptr[j+1]-1`.
- `csr_col_ind`: given $i \in [0, nnz-1]$, `csr_col_ind[i]` stores the column of the $i^{th}$ non-zero element in the matrix.

The CSR matrix is sorted by column indices in the same row, and each pair of indices appear only once.

For instance, consider a sparse matrix as

$$
A=
\left(
\begin{array}{ccccc}
1 & 2 & 0 & 3 & 0 \\
0 & 4 & 5 & 0 & 0 \\
6 & 0 & 0 & 7 & 8
\end{array}
\right)
$$

Therefore, the CSR representation of $A$ is:

```c++
m = 3

n = 5

nnz = 8

csr_val = { 1, 2, 3, 4, 5, 6, 7, 8 }

csr_row_ptr = { 0, 3, 5, 8 }

csr_col_ind = { 0, 1, 3, 1, 2, 0, 3, 4 }
```

### rocSPARSE

- rocSPARSE is initialized by calling `rocsparse_create_handle(rocsparse_handle*)` and is terminated by calling `rocsparse_destroy_handle(rocsparse_handle)`.
- `rocsparse_pointer_mode` controls whether scalar parameters must be allocated on the host (`rocsparse_pointer_mode_host`) or on the device (`rocsparse_pointer_mode_device`). It is controlled by `rocsparse_set_pointer_mode`.
- `rocsparse_operation trans`: matrix operation applied to the given input matrix. The following values are accepted:
- `rocsparse_operation_none`: identity operation $A' = A$.
- `rocsparse_operation_transpose`: transpose operation $A' = A^\mathrm{T}$.
- `rocsparse_operation_conjugate_transpose`: conjugate transpose operation (Hermitian matrix) $A' = A^\mathrm{H}$. This operation is not yet supported.
- `rocsparse_mat_descr descr`: holds all properties of a matrix. The properties set in this example are the following:
- `rocsparse_diag_type`: indicates whether the diagonal entries of a matrix are unit elements (`rocsparse_diag_type_unit`) or not (`rocsparse_diag_type_non_unit`).
- `rocsparse_fill_mode`: indicates whether a (triangular) matrix is lower (`rocsparse_fill_mode_lower`) or upper (`rocsparse_fill_mode_upper`) triangular.
- `rocsparse_[sdcz]csritsv_buffer_size` allows to obtain the size (in bytes) of the temporary storage buffer required for the `rocsparse_[sdcz]csritsv_analysis` and `rocsparse_[sdcz]csritsv_solve` functions. The character matched in `[sdcz]` coincides with the one matched in any of the mentioned functions.
- `rocsparse_solve_policy policy`: specifies the policy to follow for triangular solvers and factorizations. The only value accepted is `rocsparse_solve_policy_auto`.
- `rocsparse_[sdcz]csritsv_solve` solves a sparse triangular linear system $A' y = \alpha x$. The correct function signature should be chosen based on the datatype of the input matrix:
- `s` single-precision real (`float`)
- `d` double-precision real (`double`)
- `c` single-precision complex (`rocsparse_float_complex`)
- `z` double-precision complex (`rocsparse_double_complex`)
- `rocsparse_analysis_policy analysis`: specifies the policy to follow for analysis data. The following values are accepted:
- `rocsparse_analysis_policy_reuse`: the analysis data gathered is re-used.
- `rocsparse_analysis_policy_force`: the analysis data will be re-built.
- `rocsparse_[sdcz]csritsv_analysis` performs the analysis step for `rocsparse_[sdcz]csritsv_solve`. The character matched in `[sdcz]` coincides with the one matched in `rocsparse_[sdcz]csritsv_solve`.
- `rocsparse_csritsv_zero_pivot(rocsparse_handle, rocsparse_mat_info, rocsparse_int *position)` returns `rocsparse_status_zero_pivot` if either a structural or numerical zero has been found during the execution of `rocsparse_[sbcz]csritsv_solve(....)` and stores in `position` the index $i$ of the first zero pivot $A_{ii}$ found. If no zero pivot is found it returns `rocsparse_status_success`.

## Demonstrated API Calls

### rocSPARSE

- `rocsparse_analysis_policy`
- `rocsparse_analysis_policy_reuse`
- `rocsparse_create_handle`
- `rocsparse_create_mat_descr`
- `rocsparse_create_mat_info`
- `rocsparse_csritsv_zero_pivot`
- `rocsparse_dcsritsv_analysis`
- `rocsparse_dcsritsv_buffer_size`
- `rocsparse_dcsritsv_solve`
- `rocsparse_destroy_handle`
- `rocsparse_destroy_mat_descr`
- `rocsparse_destroy_mat_info`
- `rocsparse_diag_type_non_unit`
- `rocsparse_fill_mode_lower`
- `rocsparse_handle`
- `rocsparse_int`
- `rocsparse_mat_descr`
- `rocsparse_mat_info`
- `rocsparse_operation`
- `rocsparse_operation_none`
- `rocsparse_pointer_mode_host`
- `rocsparse_set_mat_diag_type`
- `rocsparse_set_mat_fill_mode`
- `rocsparse_set_pointer_mode`
- `rocsparse_solve_policy`
- `rocsparse_solve_policy_auto`
- `rocsparse_status`
- `rocsparse_status_zero_pivot`

### HIP runtime

- `hipDeviceSynchronize`
- `hipFree`
- `hipMalloc`
- `hipMemcpy`
- `hipMemcpyDeviceToHost`
- `hipMemcpyHostToDevice`
24 changes: 24 additions & 0 deletions Libraries/rocSPARSE/level_2/csritsv/csritsv_vs2017.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.33026.149
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "csritsv_vs2017", "csritsv_vs2017.vcxproj", "{F0AF1DEB-4B07-4FDC-8566-FB53F60D10B7}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{F0AF1DEB-4B07-4FDC-8566-FB53F60D10B7}.Debug|x64.ActiveCfg = Debug|x64
{F0AF1DEB-4B07-4FDC-8566-FB53F60D10B7}.Debug|x64.Build.0 = Debug|x64
{F0AF1DEB-4B07-4FDC-8566-FB53F60D10B7}.Release|x64.ActiveCfg = Release|x64
{F0AF1DEB-4B07-4FDC-8566-FB53F60D10B7}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {E11DC4C1-CA8A-46CA-93BB-3CB480169DA5}
EndGlobalSection
EndGlobal
Loading
Loading