forked from thu-pacman/HyQuas
-
Notifications
You must be signed in to change notification settings - Fork 2
/
CMakeLists.txt
197 lines (172 loc) · 7 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
cmake_minimum_required(VERSION 3.1)
project(QCSimulatorRoot)
set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}")
find_package(OpenMP REQUIRED)
find_package(MPI REQUIRED)
include_directories(${PROJECT_SOURCE_DIR}/third-party/dbg-macro)
set(CMAKE_CXX_FLAGS "-std=c++14 -Ofast -g -Wall ${OpenMP_CXX_FLAGS}")
set(MODE "statevec" CACHE STRING "Application, one of [statevec, densitypure, densityerr]")
set(HARDWARE "gpu" CACHE STRING "Hardware, one of [cpu, gpu]")
MESSAGE(STATUS "Mode: ${MODE}")
MESSAGE(STATUS "Hardware: ${HARDWARE}")
option(SHOW_SCHEDULE "Print the schedule" ON)
option(SHOW_SUMMARY "Show the running details" ON)
option(MEASURE_STAGE "Measure time of each stage" OFF)
option(MICRO_BENCH "Compile micro-benchmarks" OFF)
option(DISABLE_ASSERT "Use assert in cuda runtime" ON)
option(USE_DOUBLE "double or float" ON)
option(ENABLE_OVERLAP "overlap" ON)
option(USE_MPI "use mpi" OFF)
option(USE_ALL_TO_ALL "use all to all for communication" OFF)
option(ENABLE_TRANSFORM "use transformations" ON)
if (MODE STREQUAL "statevec")
add_definitions(-DMODE=0)
elseif(MODE STREQUAL "densitypure")
add_definitions(-DMODE=1)
elseif(MODE STREQUAL "densityerr")
add_definitions(-DMODE=2)
else()
MESSAGE(ERROR "invalid mode")
endif()
set(MAT "6" CACHE STRING "mat size")
MESSAGE(STATUS "mat size = ${MAT}")
add_definitions(-DBLAS_MAT_LIMIT_DEFINED=${MAT})
set(MIN_MAT "4" CACHE STRING "min mat size")
MESSAGE(STATUS "min mat size = ${MIN_MAT}")
add_definitions(-DMIN_MAT_SIZE_DEFINED=${MIN_MAT})
set(LOCAL_QUBIT_SIZE "10" CACHE STRING "local qubit size")
MESSAGE(STATUS "local qubit size = ${LOCAL_QUBIT_SIZE}")
add_definitions(-DLOCAL_QUBIT_SIZE_DEFINED=${LOCAL_QUBIT_SIZE})
set(MAX_ERROR_LEN "3" CACHE STRING "maximum error channels for each gate")
MESSAGE(STATUS "max error len = ${MAX_ERROR_LEN}")
add_definitions(-DMAX_ERROR_LEN_DEFINED=${MAX_ERROR_LEN})
set(GPU_BACKEND "group" CACHE STRING "Backend mode, one of [serial, group, group-serial, blas, mix, blas-advance]")
if (GPU_BACKEND STREQUAL "serial")
add_definitions(-DGPU_BACKEND=0)
elseif(GPU_BACKEND STREQUAL "group")
add_definitions(-DGPU_BACKEND=1)
elseif(GPU_BACKEND STREQUAL "group-serial")
add_definitions(-DGPU_BACKEND=2)
elseif(GPU_BACKEND STREQUAL "blas")
add_definitions(-DGPU_BACKEND=3)
elseif(GPU_BACKEND STREQUAL "mix")
add_definitions(-DGPU_BACKEND=4)
elseif(GPU_BACKEND STREQUAL "blas-advance")
add_definitions(-DGPU_BACKEND=5)
else()
MESSAGE(ERROR "invalid backend")
endif()
if (HARDWARE STREQUAL "cpu")
add_definitions(-DUSE_CPU)
find_library(HPTT hptt "${PROJECT_SOURCE_DIR}/third-party/hptt/lib")
include_directories(${PROJECT_SOURCE_DIR}/third-party/hptt/include)
MESSAGE(STATUS "Found HPTT: ${HPTT}")
if (NOT ${GPU_BACKEND} STREQUAL "group" AND NOT ${GPU_BACKEND} STREQUAL "group-serial")
MESSAGE(FATAL_ERROR "Only support group backend on CPU")
endif()
option(USE_AVX512 "enable avx512" ON)
option(USE_AVX2 "enable avx2" OFF)
if (${USE_AVX512} AND ${USE_AVX2})
MESSAGE(FATAL_ERROR "Cannot enable AVX2 and AVX512 at the same time")
endif()
if (USE_AVX512)
MESSAGE(STATUS "AVX512 enabled")
add_definitions(-DUSE_AVX512)
endif()
if (USE_AVX2)
MESSAGE(STATUS "AVX2 enabled")
add_definitions(-DUSE_AVX2)
endif()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -Ofast")
elseif(HARDWARE STREQUAL "gpu")
find_package(CUDA REQUIRED)
find_package(Nccl REQUIRED)
find_library(CUTT cutt "${PROJECT_SOURCE_DIR}/third-party/cutt/cutt/lib")
include_directories(${PROJECT_SOURCE_DIR}/third-party/cutt/cutt/include)
MESSAGE(STATUS "Found CUTT: ${CUTT}")
add_definitions(-DUSE_GPU)
set(CUDA_NVCC_FLAGS "-Xcompiler -fopenmp -std=c++14 -O2 -g -arch=compute_70 -code=sm_70 --ptxas-options=-v -lineinfo -keep")
MESSAGE(STATUS "GPU Backend: ${GPU_BACKEND}")
option(EVALUATOR_PREPROCESS "compile evaluator preprocess" OFF)
option(OVERLAP_MAT "overlap initMatirx" ON)
option(LOG_EVALUATOR "show logging of evaluator" OFF)
set(THREAD_DEP "7" CACHE STRING "thread dep")
MESSAGE(STATUS "thread_dep = ${THREAD_DEP}")
add_definitions(-DTHREAD_DEP_DEFINED=${THREAD_DEP})
else()
MESSAGE(ERROR "invalid hardware")
endif()
if (SHOW_SCHEDULE)
add_definitions(-DSHOW_SCHEDULE)
endif(SHOW_SCHEDULE)
if (SHOW_SUMMARY)
add_definitions(-DSHOW_SUMMARY)
endif(SHOW_SUMMARY)
if (MEASURE_STAGE)
add_definitions(-DMEASURE_STAGE)
endif(MEASURE_STAGE)
if (DISABLE_ASSERT)
add_definitions(-DNDEBUG)
else()
add_definitions(-DDEBUG)
endif(DISABLE_ASSERT)
if (ENABLE_OVERLAP)
add_definitions(-DENABLE_OVERLAP)
endif(ENABLE_OVERLAP)
if (USE_DOUBLE)
MESSAGE(STATUS "Float type: Double")
add_definitions(-DUSE_DOUBLE)
else()
MESSAGE(STATUS "Float type: Float")
endif(USE_DOUBLE)
if (OVERLAP_MAT)
add_definitions(-DOVERLAP_MAT)
endif(OVERLAP_MAT)
if (USE_MPI)
add_definitions(-DUSE_MPI=1)
else()
add_definitions(-DUSE_MPI=0)
endif(USE_MPI)
if (USE_ALL_TO_ALL)
MESSAGE(STATUS "Enable all to all")
add_definitions(-DALL_TO_ALL)
endif(USE_ALL_TO_ALL)
if (ENABLE_TRANSFORM)
MESSAGE(STATUS "Enable transform")
add_definitions(-DENABLE_TRANSFORM)
endif()
set(COALESCE "3" CACHE STRING "coalescing size")
MESSAGE(STATUS "coalesce = ${COALESCE}")
add_definitions(-DCOALESCE_GLOBAL_DEFINED=${COALESCE})
set(INPLACE "0" CACHE STRING "fixed local size")
MESSAGE(STATUS "INPLACE = ${INPLACE}")
add_definitions(-DINPLACE=${INPLACE})
set(MAX_SLICE "20" CACHE STRING "max slice in inplace mode")
MESSAGE(STATUS "MAX_SLICE = ${MAX_SLICE}")
add_definitions(-DMAX_SLICE=${MAX_SLICE})
if (NOT ${INPLACE} EQUAL "0" AND ${ENABLE_OVERLAP})
MESSAGE(FATAL_ERROR "Do not support INPLACE and ENABLE_OVERLAP simultaneously")
endif()
if (NOT ${INPLACE} EQUAL "0" AND NOT ${GPU_BACKEND} STREQUAL "group")
MESSAGE(FATAL_ERROR "Only allow group GPU_BACKEND when INPLACE is enabled. INPLACE communication cannot save memory when using blas backend or mix backend")
endif()
if (EVALUATOR_PREPROCESS)
set(PROCESS process)
add_executable(process evaluator-preprocess/process.cpp)
target_link_libraries(process QCSimulator ${CUTT} ${OpenMP_CXX_FLAGS} ${CUDA_CUBLAS_LIBRARIES} ${MPI_CXX_LIBRARIES} ${NCCL_LIBRARY} ${HPTT})
add_definitions(-DUSE_EVALUATOR_PREPROCESS)
endif(EVALUATOR_PREPROCESS)
if(LOG_EVALUATOR)
add_definitions(-DLOG_EVALUATOR)
endif(LOG_EVALUATOR)
include_directories ("${PROJECT_SOURCE_DIR}/src")
add_subdirectory("src")
add_executable(main main.cpp)
target_link_libraries(main QCSimulator ${CUTT} ${OpenMP_CXX_FLAGS} ${CUDA_CUBLAS_LIBRARIES} ${MPI_CXX_LIBRARIES} ${NCCL_LIBRARY} ${HPTT})
if (MICRO_BENCH)
set(BENCHMARKS local-single local-ctr two-group-h bench-blas)
foreach(BENCHMARK IN LISTS BENCHMARKS)
add_executable(${BENCHMARK} micro-benchmark/${BENCHMARK}.cpp)
target_link_libraries(${BENCHMARK} QCSimulator ${CUTT} ${OpenMP_CXX_FLAGS} ${CUDA_CUBLAS_LIBRARIES} ${MPI_CXX_LIBRARIES} ${NCCL_LIBRARY} ${HPTT})
endforeach(BENCHMARK IN LISTS BENCHMARKS)
endif(MICRO_BENCH)