-
Notifications
You must be signed in to change notification settings - Fork 0
/
spmm.cu
106 lines (76 loc) · 3.67 KB
/
spmm.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/********************************************************************************
* Main
* Sparse Matrix Multipliation Example
* Author : Janghyun Son
* Email : jhson989@gmail.com
*******************************************************************************/
#include <algorithm>
#include <time.h>
#include "include/config.cuh" // Program configuration
#include "include/debug.cuh" // Debug code
#include "include/data.cuh" // Sparse matrix generator
#include "include/convert.cuh" // CSR convertor
#include "include/matmul_sparse.cuh" // SpMM implementation
#include "include/matmul_dense.cuh" // DeMM implementation
int main(void) {
/*******************************************************************
* Log
*******************************************************************/
srand(time(NULL));
std::cout << "" << std::endl;
std::cout << "==========================================================" << std::endl;
std::cout << "Sparse Matrix Multipliation Example" << std::endl;
std::cout << " -- Mutiplication of a sparse matrix and a dense matrix" << std::endl;
std::cout << " -- C["<<M<<","<<N<<"] = A["<<M<<","<<K<<"] * B["<<K<<","<<N<<"]" << std::endl;
std::cout << " -- Total memory size: " << 1.0f*sizeof(DTYPE)*(M*K+K*N+M*N)*1e-9 << " GB" << std::endl;
std::cout << " -- Sparsity of matrix : " << SPARSITY << std::endl;
std::cout << "==========================================================" << std::endl;
std::cout << "" << std::endl;
/*******************************************************************
* Data initialization
*******************************************************************/
/* Host data generation */
std::vector<DTYPE> A(M*K);
make_sparse_matrix(A);
std::vector<DTYPE> B(K*N);
std::generate(B.begin(), B.end(), get_random_number);
std::vector<DTYPE> C(M*N, 0);
/* Alloc GPU memory */
DTYPE *d_A, *d_B, *d_C;
cudaErrChk( cudaMalloc((void**)&d_A, sizeof(DTYPE)*M*K) );
cudaErrChk( cudaMalloc((void**)&d_B, sizeof(DTYPE)*K*N) );
cudaErrChk( cudaMalloc((void**)&d_C, sizeof(DTYPE)*M*N) );
/* Memcpy from host to device */
cudaErrChk( cudaMemcpy(d_A, A.data(), sizeof(DTYPE)*M*K, cudaMemcpyHostToDevice) );
cudaErrChk( cudaMemcpy(d_B, B.data(), sizeof(DTYPE)*K*N, cudaMemcpyHostToDevice) );
cudaErrChk( cudaDeviceSynchronize() );
cudaErrChk( cudaGetLastError() );
/*******************************************************************
* Conversion
*******************************************************************/
/* Device memory for CSR format array : rowPtr, col, value */
int *d_row_ptr, *d_col;
DTYPE *d_value;
/* Run CSR convertor */
convert_to_CSR(d_A, (void**)&d_row_ptr, (void**)&d_col, (void**)&d_value);
/*******************************************************************
* Sparse - Dense Matrix Multiplication
*******************************************************************/
/* Run SpMM CPU implementation */
spmm_cpu(d_row_ptr, d_col, d_value, A, B, C);
/* Run SpMM GPU implementation - 1 */
spmm_gpu_1(d_row_ptr, d_col, d_value, d_A, d_B, d_C, A, B, C);
/* Run SpMM GPU implementation - 1 */
demm_gpu_1(d_A, d_B, d_C, A, B, C);
/*******************************************************************
* Finalize
*******************************************************************/
/* Dealloc memory */
cudaErrChk( cudaFree(d_A) );
cudaErrChk( cudaFree(d_B) );
cudaErrChk( cudaFree(d_C) );
cudaErrChk( cudaFree(d_row_ptr) );
cudaErrChk( cudaFree(d_col) );
cudaErrChk( cudaFree(d_value) );
return 0;
}