-
Notifications
You must be signed in to change notification settings - Fork 67
OpenACC and Hackathons Summit Performance Data Documentation
Matt Norman edited this page Jul 13, 2022
·
2 revisions
RTX 5000 in single precision only using CUDA release 11.7, V11.7.64 with driver 515.43.04
module load gcc-12.1.0-gcc-11.1.0-g2ai6t2
cmake -DCMAKE_Fortran_COMPILER=mpif90 \
-DOPENMP_FLAGS="-fopenmp" \
-DOPENACC_FLAGS="-fopenacc -ffast-math -foffload=nvptx-none=\"-lm -O3 -ffast-math -DSINGLE_PREC -march=sm_80 -moptimize\" -fopenacc-dim=16384:1:128 -DSINGLE_PREC -fopt-info-omp" \
-DOPENMP45_FLAGS="-fopenmp -ffast-math -foffload=nvptx-none=\"-lm -O3 -latomic -ffast-math -DSINGLE_PREC -march=sm_80 -moptimize\" -DSINGLE_PREC -fopt-info-omp" \
-DFFLAGS="-O3 -march=native -mtune=native -ffree-line-length-none -DNO_INFORM -DSINGLE_PREC -I/usr/lib/x86_64-linux-gnu/fortran/gfortran-mod-15" \
-DLDFLAGS="-L/usr/lib/x86_64-linux-gnu -lpnetcdf" \
-DNX=2048 \
-DNZ=1024 \
-DSIM_TIME=10 \
-DOUT_FREQ=20 \
..
export OMPI_FC=nvfortran
# Version 22.5.0
cmake -DCMAKE_Fortran_COMPILER=mpif90 \
-DFFLAGS="-O3 -march=native -mtune=native -Mextend -DNO_INFORM -DSINGLE_PREC -I/opt/parallel-netcdf-1.12.0_nvhpc/include" \
-DLDFLAGS="-L/opt/parallel-netcdf-1.12.0_nvhpc/lib -lpnetcdf" \
-DOPENMP_FLAGS="-mp -Minfo=mp" \
-DOPENACC_FLAGS:STRING="-acc -gpu=cc86,fastmath,loadcache:L1,pinned,unroll,fma,ptxinfo -Minfo=accel" \
-DOPENMP45_FLAGS:STRING="-Minfo=mp -mp=gpu -gpu=cc86,fastmath,loadcache:L1,pinned,unroll,fma,ptxinfo" \
-DDO_CONCURRENT_FLAGS:STRING="-stdpar=gpu -Minfo=stdpar -gpu=cc86,fastmath,loadcache:L2,unroll,fma,ptxinfo" \
-DNX=2048 \
-DNZ=1024 \
-DSIM_TIME=10 \
-DOUT_FREQ=20 \
..
export OMPI_CXX=g++-11
export OMPI_FC=gfortran-11
export OMPI_F90=gfortran-11
export OMPI_CC=gcc-11
./cmake_clean.sh
cmake -DCMAKE_CXX_COMPILER=mpic++ \
-DCMAKE_Fortran_COMPILER=mpif90 \
-DCMAKE_C_COMPILER=mpicc \
-DYAKL_CUDA_FLAGS="-O3 -DHAVE_MPI -DNO_INFORM --use_fast_math -arch sm_86 -ccbin mpic++ -DSINGLE_PREC -I/usr/lib/x86_64-linux-gnu/fortran/gfortran-mod-15" \
-DLDFLAGS="-L/usr/lib/x86_64-linux-gnu -lpnetcdf" \
-DNX=2048 \
-DNZ=1024 \
-DSIM_TIME=10 \
-DOUT_FREQ=-1 \
-DYAKL_ARCH="CUDA" \
..
module load gcc-12.1.0-gcc-11.1.0-g2ai6t2
cmake -DCMAKE_Fortran_COMPILER=mpif90 \
-DFFLAGS="-O3 -march=native -mtune=native -ffree-line-length-none -DNO_INFORM -DSINGLE_PREC -I/usr/lib/x86_64-linux-gnu/fortran/gfortran-mod-15" \
-DLDFLAGS="-L/usr/lib/x86_64-linux-gnu -lpnetcdf" \
-DNX=256 \
-DNZ=128 \
-DSIM_TIME=250 \
-DOUT_FREQ=2000 \
..
cmake -DCMAKE_Fortran_COMPILER=mpif90 \
-DFFLAGS="-O3 -xHost -DNO_INFORM -DSINGLE_PREC -fp-model=fast=2 -fast-transcendentals -I/opt/parallel-netcdf-1.12.0_intel/include" \
-DLDFLAGS="-L/opt/parallel-netcdf-1.12.0_intel/lib -lpnetcdf" \
-DOPENMP_FLAGS="-qopenmp" \
-DNX=256 \
-DNZ=128 \
-DSIM_TIME=250 \
-DOUT_FREQ=500 \
..
cmake -DCMAKE_Fortran_COMPILER=mpif90 \
-DFFLAGS="-O3 -Mfprelaxed -march=native -mtune=native -Mextend -DNO_INFORM -I/opt/parallel-netcdf-1.12.0_nvhpc/include" \
-DLDFLAGS="-L/opt/parallel-netcdf-1.12.0_nvhpc/lib -lpnetcdf" \
-DOPENMP_FLAGS="-mp -Minfo=mp" \
-DOPENACC_FLAGS:STRING="-acc -gpu=cc86,fastmath,loadcache:L1,pinned,unroll,fma,ptxinfo -Minfo=accel" \
-DOPENMP45_FLAGS:STRING="-Minfo=mp -mp=gpu -gpu=cc86,fastmath,loadcache:L1,pinned,unroll,fma,ptxinfo" \
-DDO_CONCURRENT_FLAGS:STRING="-stdpar=gpu -Minfo=stdpar -gpu=cc86,fastmath,loadcache:L2,unroll,fma,ptxinfo" \
-DNX=256 \
-DNZ=128 \
-DSIM_TIME=250\
-DOUT_FREQ=500\
..