Skip to content

Commit

Permalink
Merge pull request #46 from rozukke/fix/qc-pass-prefinal
Browse files Browse the repository at this point in the history
Prefinal QC pass
  • Loading branch information
nhatdongdang committed Jul 8, 2024
2 parents e63a2cc + e716eb5 commit 16eaf0d
Show file tree
Hide file tree
Showing 9 changed files with 188 additions and 158 deletions.
32 changes: 20 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,34 +1,42 @@
cmake_minimum_required(VERSION 3.16)

project(ichida-algo LANGUAGES C CXX)
project(ichida-algo LANGUAGES C)

set(CMAKE_C_FLAGS "-O3 -march=native -ffast-math -funroll-loops -fopenmp -Wall -Wextra")
set(CMAKE_C_FLAGS "-O3 -march=native -ffast-math -funroll-loops -fopenmp -Wall -Wextra -Wpedantic")
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED True)
set(CMAKE_VERBOSE_MAKEFILE ON)

set(INC_DIR include)
set(SRC_DIR src)
set(SOURCE_DIR src)
set(CUDA_SRC_DIR cudasrc)

include_directories(${INC_DIR})

file(GLOB_RECURSE SOURCE_FILES ${SRC_DIR}/*.c)
file(GLOB SOURCE_FILES ${SOURCE_DIR}/*.c)

add_executable(speed_cpu ${SOURCE_FILES})
target_link_libraries(speed_cpu m pthread gomp)

find_package(CUDA)

if(CUDA_FOUND)
enable_language(CUDA)
set(CMAKE_CUDA_ARCHITECTURES "80")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xptxas -O3 --use_fast_math -Xcompiler -march=native -unroll-aggressive -arch=sm_80")
find_package(MPI REQUIRED)
include_directories(${MPI_INCLUDE_PATH})
file(GLOB_RECURSE CUDA_SOURCE_FILES ${CUDA_SRC_DIR}/*.cu)
add_executable(speed_gpu ${CUDA_SOURCE_FILES})
set_target_properties(speed_gpu PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_link_libraries(speed_gpu m ${MPI_LIBRARIES})

if(COMPILE_MPI)
find_package(MPI)
if(MPI_FOUND)
include_directories(${MPI_INCLUDE_PATH})
target_link_libraries(speed_gpu m ${MPI_LIBRARIES})
set_property(TARGET speed_gpu PROPERTY COMPILE_DEFINITIONS USE_MPI)
else()
message(STATUS "MPI not found. Please install library to compile with MPI enabled.")
endif(MPI_FOUND)

else()
target_link_libraries(speed_gpu m)
endif()

else()
message(STATUS "CUDA not found, only CPU version will be built.")
endif()
Expand Down
28 changes: 18 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
.PHONY: all clean build run_cpu run_gpu test_cpu test_gpu bench stat
.PHONY: all clean build build_mpi build_gpu test_cpu test_gpu bench stat

# Default iterations
iterations ?= 1000
iterations ?= 100000

all: build

Expand All @@ -11,26 +11,34 @@ clean:
rm -rf build
rm -f speed_cpu speed_gpu

build: clean
build_gpu: clean
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
$(MAKE) -C build
cp -u build/speed_cpu ./
if [ -f build/speed_gpu ]; then cp -u build/speed_gpu ./; fi

run_cpu: build
build_mpi: clean
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILE_MPI=True
$(MAKE) -C build
cp -u build/speed_cpu ./
if [ -f build/speed_gpu ]; then cp -u build/speed_gpu ./; fi

build: build_mpi

run_cpu:
./speed_cpu ./weights_and_biases.txt ./tensors $(iterations)

run_gpu: build
run_gpu:
./speed_gpu ./weights_and_biases.txt ./tensors $(iterations)

run_mpi:
n_gpus=$(shell nvidia-smi --query-gpu=name --format=csv,noheader | wc -l); \
mpirun -np $$n_gpus ./speed_gpu ./weights_and_biases.txt ./tensors $(iterations)

test_cpu: build
./speed_cpu ./weights_and_biases.txt ./tensors $(iterations)
test_cpu: build run_cpu
mv ./results.csv ./test
python3 ./test/verify_csv.py

test_gpu: build
n_gpus=$(shell nvidia-smi --query-gpu=name --format=csv,noheader | wc -l); \
mpirun -np $$n_gpus ./speed_gpu ./weights_and_biases.txt ./tensors $(iterations)
test_gpu: build run_mpi
mv ./results.csv ./test
python3 ./test/verify_csv.py
Loading

0 comments on commit 16eaf0d

Please sign in to comment.