Merge pull request #46 from rozukke/fix/qc-pass-prefinal

Prefinal QC pass
kachi-group · Jul 8, 2024 · 16eaf0d · 16eaf0d
2 parents e63a2cc + e716eb5
commit 16eaf0d
Show file tree

Hide file tree

Showing 9 changed files with 188 additions and 158 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,34 +1,42 @@
 cmake_minimum_required(VERSION 3.16)
 
-project(ichida-algo LANGUAGES C CXX)
+project(ichida-algo LANGUAGES C)
 
-set(CMAKE_C_FLAGS "-O3 -march=native -ffast-math -funroll-loops -fopenmp -Wall -Wextra")
+set(CMAKE_C_FLAGS "-O3 -march=native -ffast-math -funroll-loops -fopenmp -Wall -Wextra -Wpedantic")
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_C_STANDARD_REQUIRED True)
-set(CMAKE_VERBOSE_MAKEFILE ON)
 
-set(INC_DIR include)
-set(SRC_DIR src)
+set(SOURCE_DIR src)
 set(CUDA_SRC_DIR cudasrc)
 
-include_directories(${INC_DIR})
-
-file(GLOB_RECURSE SOURCE_FILES ${SRC_DIR}/*.c)
+file(GLOB SOURCE_FILES ${SOURCE_DIR}/*.c)
 
 add_executable(speed_cpu ${SOURCE_FILES})
 target_link_libraries(speed_cpu m pthread gomp)
 
 find_package(CUDA)
-
 if(CUDA_FOUND)
  enable_language(CUDA)
+ set(CMAKE_CUDA_ARCHITECTURES "80")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xptxas -O3 --use_fast_math -Xcompiler -march=native -unroll-aggressive -arch=sm_80")
- find_package(MPI REQUIRED)
- include_directories(${MPI_INCLUDE_PATH})
  file(GLOB_RECURSE CUDA_SOURCE_FILES ${CUDA_SRC_DIR}/*.cu)
  add_executable(speed_gpu ${CUDA_SOURCE_FILES})
  set_target_properties(speed_gpu PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
- target_link_libraries(speed_gpu m ${MPI_LIBRARIES})
+
+ if(COMPILE_MPI)
+ find_package(MPI)
+ if(MPI_FOUND)
+ include_directories(${MPI_INCLUDE_PATH})
+ target_link_libraries(speed_gpu m ${MPI_LIBRARIES})
+ set_property(TARGET speed_gpu PROPERTY COMPILE_DEFINITIONS USE_MPI)
+ else()
+ message(STATUS "MPI not found. Please install library to compile with MPI enabled.")
+ endif(MPI_FOUND)
+
+ else() 
+ target_link_libraries(speed_gpu m)
+ endif()
+
 else()
  message(STATUS "CUDA not found, only CPU version will be built.")
 endif()

diff --git a/Makefile b/Makefile
@@ -1,7 +1,7 @@
-.PHONY: all clean build run_cpu run_gpu test_cpu test_gpu bench stat
+.PHONY: all clean build build_mpi build_gpu test_cpu test_gpu bench stat
 
 # Default iterations
-iterations ?= 1000
+iterations ?= 100000
 
 all: build
 
@@ -11,26 +11,34 @@ clean:
  rm -rf build
  rm -f speed_cpu speed_gpu
 
-build: clean
+build_gpu: clean
  cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
  $(MAKE) -C build
  cp -u build/speed_cpu ./
  if [ -f build/speed_gpu ]; then cp -u build/speed_gpu ./; fi
 
-run_cpu: build
+build_mpi: clean
+ cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILE_MPI=True
+ $(MAKE) -C build
+ cp -u build/speed_cpu ./
+ if [ -f build/speed_gpu ]; then cp -u build/speed_gpu ./; fi
+
+build: build_mpi
+
+run_cpu:
  ./speed_cpu ./weights_and_biases.txt ./tensors $(iterations)
 
-run_gpu: build
+run_gpu:
+ ./speed_gpu ./weights_and_biases.txt ./tensors $(iterations)
+
+run_mpi:
  n_gpus=$(shell nvidia-smi --query-gpu=name --format=csv,noheader | wc -l); \
  mpirun -np $$n_gpus ./speed_gpu ./weights_and_biases.txt ./tensors $(iterations)
 
-test_cpu: build
- ./speed_cpu ./weights_and_biases.txt ./tensors $(iterations)
+test_cpu: build run_cpu
  mv ./results.csv ./test
  python3 ./test/verify_csv.py
 
-test_gpu: build
- n_gpus=$(shell nvidia-smi --query-gpu=name --format=csv,noheader | wc -l); \
- mpirun -np $$n_gpus ./speed_gpu ./weights_and_biases.txt ./tensors $(iterations)
+test_gpu: build run_mpi
  mv ./results.csv ./test
  python3 ./test/verify_csv.py