Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into 1623_add_training
Browse files Browse the repository at this point in the history
  • Loading branch information
hiker committed Jan 6, 2025
2 parents 2cbf3d0 + d6dc2bd commit 6b93d3c
Show file tree
Hide file tree
Showing 527 changed files with 16,682 additions and 8,551 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/compilation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ on:
push

env:
CUDA_VERSION: 12.6.0
CUDA_VERSION: 12.6.3
GFORTRAN_VERSION: 14.2.0
HDF5_VERSION: 1.14.4.3
HDF5_VERSION: 1.14.5
NETCDF_C_VERSION: 4.9.2
NETCDF_FORTRAN_VERSION: 4.6.1
NVFORTRAN_VERSION: 24.7
OPENMPI_VERSION: 5.0.5
PYTHON_VERSION: 3.12.5
NVFORTRAN_VERSION: 24.11
OPENMPI_VERSION: 5.0.6
PYTHON_VERSION: 3.13.0

jobs:
run_if_on_mirror:
Expand Down Expand Up @@ -131,5 +131,5 @@ jobs:
module load nvidia-hpcsdk/${NVFORTRAN_VERSION}
module load hdf5/${HDF5_VERSION} netcdf_c/${NETCDF_C_VERSION} netcdf_fortran/${NETCDF_FORTRAN_VERSION}
F90=nvfortran F90FLAGS="-acc -Minfo=all" make -C tutorial/practicals/LFRic compile
F90=nvfortran F90FLAGS="-acc -Minfo=all -Mnofma -O2" make -C tutorial/practicals/nemo run
make -C tutorial/practicals/nemo/4_nemo_openacc acc_test
F90=nvfortran F90FLAGS="-acc -Minfo=all -Mnofma -O2" make -C tutorial/practicals/generic run
make -C tutorial/practicals/generic/4_openacc acc_test
131 changes: 110 additions & 21 deletions .github/workflows/lfric_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
runs-on: self-hosted
env:
LFRIC_APPS_REV: 3269
PYTHON_VERSION: 3.12.5
PYTHON_VERSION: 3.13.0

steps:
- uses: actions/checkout@v3
Expand All @@ -73,7 +73,110 @@ jobs:
# than the latest release from pypi.
# pip install external/fparser
pip install .[test]
pip install jinja2
# PSyclone, compile and run MetOffice gungho_model on GPU
- name: LFRic GungHo with OpenMP offload
run: |
# Set up environment
source /apps/spack/psyclone-spack/spack-repo/share/spack/setup-env.sh
spack load lfric-build-environment%nvhpc
source .runner_venv/bin/activate
export PSYCLONE_LFRIC_DIR=${GITHUB_WORKSPACE}/examples/lfric/scripts
export PSYCLONE_CONFIG_FILE=${PSYCLONE_LFRIC_DIR}/KGOs/lfric_psyclone.cfg
# The LFRic source must be patched to workaround bugs in the NVIDIA
# compiler's namelist handling.
rm -rf ${HOME}/LFRic/gpu_build
mkdir -p ${HOME}/LFRic/gpu_build
cp -r ${HOME}/LFRic/lfric_apps_${LFRIC_APPS_REV} ${HOME}/LFRic/gpu_build/lfric_apps
cp -r ${HOME}/LFRic/lfric_core_50869 ${HOME}/LFRic/gpu_build/lfric
cd ${HOME}/LFRic/gpu_build
patch -p1 < ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_${LFRIC_APPS_REV}_nvidia.patch
# Update the compiler definitions to build for GPU
cp ${PSYCLONE_LFRIC_DIR}/KGOs/nvfortran_acc.mk lfric/infrastructure/build/fortran/nvfortran.mk
cp ${PSYCLONE_LFRIC_DIR}/KGOs/nvc++.mk lfric/infrastructure/build/cxx/.
# Update the PSyclone commands to ensure transformed kernels are written
# to working directory.
cp ${PSYCLONE_LFRIC_DIR}/KGOs/psyclone.mk lfric/infrastructure/build/psyclone/.
# Update dependencies.sh to point to our patched lfric core.
sed -i -e 's/export lfric_core_sources=.*$/export lfric_core_sources\=\/home\/gh_runner\/LFRic\/gpu_build\/lfric/' lfric_apps/dependencies.sh
export LFRIC_DIR=${HOME}/LFRic/gpu_build/lfric_apps
export OPT_DIR=${LFRIC_DIR}/applications/gungho_model/optimisation/psyclone-test
cd ${LFRIC_DIR}
# PSyclone scripts must now be under 'optimisation' and be called 'global.py'
mkdir -p ${OPT_DIR}
cp ${PSYCLONE_LFRIC_DIR}/gpu_offloading.py ${OPT_DIR}/global.py
# Clean previous version and compile again
rm -rf applications/gungho_model/working
LFRIC_OFFLOAD_DIRECTIVES=omp ./build/local_build.py -a gungho_model -p psyclone-test
cd applications/gungho_model/example
cp ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its.nml configuration.nml
mpirun -n 1 ../bin/gungho_model configuration.nml |& tee output.txt
python ${PSYCLONE_LFRIC_DIR}/compare_ouput.py ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its_checksums.txt gungho_model-checksums.txt
cat timer.txt
export VAR_TIME=$(grep "gungho_model" timer.txt | cut -d'|' -f5)
export VAR_HALOS=$(grep "gungho_model" halo_calls_counter.txt | cut -d'|' -f5)
echo $GITHUB_REF_NAME $GITHUB_SHA $VAR_TIME $VAR_HALOS >> ${HOME}/store_results/lfric_omp_performance_history
${HOME}/mongosh-2.1.1-linux-x64/bin/mongosh \
"mongodb+srv://cluster0.x8ncpxi.mongodb.net/PerformanceMonitoring" \
--quiet --apiVersion 1 --username ${{ secrets.MONGODB_USERNAME }} \
--password ${{ secrets.MONGODB_PASSWORD }} \
--eval 'db.GitHub_CI.insertOne({branch_name: "'"$GITHUB_REF_NAME"'", commit: "'"$GITHUB_SHA"'",
github_job: "'"$GITHUB_RUN_ID"'"-"'"$GITHUB_RUN_ATTEMPT"'",
ci_test: "LFRic OpenMP offloading", lfric_apps_version: '"$LFRIC_APPS_REV"', system: "GlaDos",
compiler:"spack-nvhpc-24.5", date: new Date(), elapsed_time: '"$VAR_TIME"',
num_of_halo_exchanges: '"$VAR_HALOS"'})'
# PSyclone, compile and run MetOffice gungho_model on GPU
- name: LFRic GungHo with OpenACC offload
run: |
# Set up environment
source /apps/spack/psyclone-spack/spack-repo/share/spack/setup-env.sh
spack load lfric-build-environment%nvhpc
source .runner_venv/bin/activate
export PSYCLONE_LFRIC_DIR=${GITHUB_WORKSPACE}/examples/lfric/scripts
export PSYCLONE_CONFIG_FILE=${PSYCLONE_LFRIC_DIR}/KGOs/lfric_psyclone.cfg
# The LFRic source must be patched to workaround bugs in the NVIDIA
# compiler's namelist handling.
rm -rf ${HOME}/LFRic/gpu_build
mkdir -p ${HOME}/LFRic/gpu_build
cp -r ${HOME}/LFRic/lfric_apps_${LFRIC_APPS_REV} ${HOME}/LFRic/gpu_build/lfric_apps
cp -r ${HOME}/LFRic/lfric_core_50869 ${HOME}/LFRic/gpu_build/lfric
cd ${HOME}/LFRic/gpu_build
patch -p1 < ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_${LFRIC_APPS_REV}_nvidia.patch
# Update the compiler definitions to build for GPU
cp ${PSYCLONE_LFRIC_DIR}/KGOs/nvfortran_acc.mk lfric/infrastructure/build/fortran/nvfortran.mk
cp ${PSYCLONE_LFRIC_DIR}/KGOs/nvc++.mk lfric/infrastructure/build/cxx/.
# Update the PSyclone commands to ensure transformed kernels are written
# to working directory.
cp ${PSYCLONE_LFRIC_DIR}/KGOs/psyclone.mk lfric/infrastructure/build/psyclone/.
# Update dependencies.sh to point to our patched lfric core.
sed -i -e 's/export lfric_core_sources=.*$/export lfric_core_sources\=\/home\/gh_runner\/LFRic\/gpu_build\/lfric/' lfric_apps/dependencies.sh
export LFRIC_DIR=${HOME}/LFRic/gpu_build/lfric_apps
export OPT_DIR=${LFRIC_DIR}/applications/gungho_model/optimisation/psyclone-test
cd ${LFRIC_DIR}
# PSyclone scripts must now be under 'optimisation' and be called 'global.py'
mkdir -p ${OPT_DIR}
cp ${PSYCLONE_LFRIC_DIR}/gpu_offloading.py ${OPT_DIR}/global.py
# Clean previous version and compile again
rm -rf applications/gungho_model/working
LFRIC_OFFLOAD_DIRECTIVES=acc ./build/local_build.py -a gungho_model -p psyclone-test
cd applications/gungho_model/example
cp ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its.nml configuration.nml
mpirun -n 1 ../bin/gungho_model configuration.nml |& tee output.txt
python ${PSYCLONE_LFRIC_DIR}/compare_ouput.py ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its_checksums.txt gungho_model-checksums.txt
cat timer.txt
export VAR_TIME=$(grep "gungho_model" timer.txt | cut -d'|' -f5)
export VAR_HALOS=$(grep "gungho_model" halo_calls_counter.txt | cut -d'|' -f5)
echo $GITHUB_REF_NAME $GITHUB_SHA $VAR_TIME $VAR_HALOS >> ${HOME}/store_results/lfric_acc_performance_history
${HOME}/mongosh-2.1.1-linux-x64/bin/mongosh \
"mongodb+srv://cluster0.x8ncpxi.mongodb.net/PerformanceMonitoring" \
--quiet --apiVersion 1 --username ${{ secrets.MONGODB_USERNAME }} \
--password ${{ secrets.MONGODB_PASSWORD }} \
--eval 'db.GitHub_CI.insertOne({branch_name: "'"$GITHUB_REF_NAME"'", commit: "'"$GITHUB_SHA"'",
github_job: "'"$GITHUB_RUN_ID"'"-"'"$GITHUB_RUN_ATTEMPT"'",
ci_test: "LFRic OpenACC", lfric_apps_version: '"$LFRIC_APPS_REV"', system: "GlaDos",
compiler:"spack-nvhpc-24.5", date: new Date(), elapsed_time: '"$VAR_TIME"',
num_of_halo_exchanges: '"$VAR_HALOS"'})'
# PSyclone, compile and run MetOffice LFRic with 6 MPI ranks
- name: LFRic passthrough (with DistributedMemory)
Expand All @@ -91,9 +194,9 @@ jobs:
./build/local_build.py -a gungho_model -v
# Run
cd applications/gungho_model/example
cp ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gunho_configuration_4its.nml configuration.nml
cp ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its.nml configuration.nml
mpirun -n 6 ../bin/gungho_model configuration.nml |& tee output.txt
python ${PSYCLONE_LFRIC_DIR}/compare_ouput.py ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gunho_configuration_4its_checksums.txt gungho_model-checksums.txt
python ${PSYCLONE_LFRIC_DIR}/compare_ouput.py ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its_checksums.txt gungho_model-checksums.txt
cat timer.txt
export VAR_TIME=$(grep "gungho_model" timer.txt | cut -d'|' -f5)
export VAR_HALOS=$(grep "gungho_model" halo_calls_counter.txt | cut -d'|' -f5)
Expand All @@ -108,13 +211,6 @@ jobs:
compiler:"spack-gfortran-14", date: new Date(), elapsed_time: '"$VAR_TIME"',
num_of_halo_exchanges: '"$VAR_HALOS"'})'
- name: Upload LFRic passthrough results
uses: exuanbo/actions-deploy-gist@v1
with:
token: ${{ secrets.GIST_TOKEN }}
gist_id: a4049a0fc0a0a11651a5ce6a04d76160
file_path: ../../../../store_results/lfric_passthrough_performance_history

# PSyclone, compile and run MetOffice LFRic with all optimisations and 6 OpenMP threads
- name: LFRic with all transformations
run: |
Expand All @@ -127,18 +223,18 @@ jobs:
export LFRIC_DIR=${HOME}/LFRic/lfric_apps_${LFRIC_APPS_REV}
export OPT_DIR=${LFRIC_DIR}/applications/gungho_model/optimisation/psyclone-test
cd ${LFRIC_DIR}
# Psyclone scripts must now be under 'optimisation' and be called 'global.py'
# PSyclone scripts must now be under 'optimisation' and be called 'global.py'
mkdir -p applications/gungho_model/optimisation/psyclone-test
cp ${PSYCLONE_LFRIC_DIR}/everything_everywhere_all_at_once.py ${OPT_DIR}/global.py
# Clean previous version and compile again
rm -rf applications/gungho_model/working
./build/local_build.py -a gungho_model -p psyclone-test -v
# Run
cd applications/gungho_model/example
cp ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gunho_configuration_4its.nml configuration.nml
cp ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its.nml configuration.nml
export OMP_NUM_THREADS=6
mpirun -n 1 ../bin/gungho_model configuration.nml |& tee output.txt
python ${PSYCLONE_LFRIC_DIR}/compare_ouput.py ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gunho_configuration_4its_checksums.txt gungho_model-checksums.txt
python ${PSYCLONE_LFRIC_DIR}/compare_ouput.py ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its_checksums.txt gungho_model-checksums.txt
cat timer.txt
export VAR_TIME=$(grep "gungho_model" timer.txt | cut -d'|' -f5)
export VAR_HALOS=$(grep "gungho_model" halo_calls_counter.txt | cut -d'|' -f5)
Expand All @@ -152,10 +248,3 @@ jobs:
ci_test: "LFRic all transformations", lfric_version: '"$LFRIC_APPS_REV"', omp_threads: '"$OMP_NUM_THREADS"',
system: "GlaDos", compiler:"spack-gfortran-14", date: new Date(), elapsed_time: '"$VAR_TIME"',
num_of_halo_exchanges: '"$VAR_HALOS"'})'
- name: Upload LFRic optimised results
uses: exuanbo/actions-deploy-gist@v1
with:
token: ${{ secrets.GIST_TOKEN }}
gist_id: a4049a0fc0a0a11651a5ce6a04d76160
file_path: ../../../../store_results/lfric_optimised_performance_history
17 changes: 8 additions & 9 deletions .github/workflows/nemo_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ jobs:
if: ${{ github.repository == 'stfc/PSyclone-mirror' }}
runs-on: self-hosted
env:
HDF5_VERSION: 1.14.4.3
HDF5_VERSION: 1.14.5
NETCDF_C_VERSION: 4.9.2
NETCDF_FORTRAN_VERSION: 4.6.1
NVFORTRAN_VERSION: 23.7
ONEAPI_VERSION: 2024.2.1
PERL_VERSION: 5.40.0
PYTHON_VERSION: 3.12.5
PYTHON_VERSION: 3.13.0

steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -202,20 +202,19 @@ jobs:
export COMPILER_ARCH=linux_intel
export ADD_KEYS="IEEE_IS_NAN=ieee_is_nan key_nosignedzero"
export DEL_KEYS="key_iomput"
export MODEL_DIR=/archive/ssiso/ecmwf_eORCA1_GO8/
export NAMELISTS_DIR=${NEMO_DIR}/testscripts_V40/output/openmp_outer_V40_eORCA1_GO8_Z75_20170906_cray_dp_1x1/
export INPUT_DIR=/archive/NEMO_INPUTS/NEMOv4/ECMWF/eORCA1_GO8/
export MPI_INC_DIR=${I_MPI_ROOT}/include
cd $PSYCLONE_NEMO_DIR
make -j 4 openmp_cpu
make -j 4 compile-openmp_cpu
export OMP_NUM_THREADS=4
make run-openmp_cpu |& tee output.txt
# Check the output is as expected for the first 6 digits
# Check the output is as expected (TODO #2787: improve numerical reproducibility)
tail -n 1 output.txt | grep -q " it : 10" || (echo "Error: 'it : 10' not found!" & false)
tail -n 1 output.txt | grep -q "|ssh|_max: 0.199714" || (echo "Error: '|ssh|_max: 0.199714' not found!" & false)
tail -n 1 output.txt | grep -q "|U|_max: 0.148409" || (echo "Error: '|U|_max: 0.148409' not found!" & false)
tail -n 1 output.txt | grep -q "S_min: 0.108530" || (echo "Error: 'S_min: 0.108530' not found!" & false)
tail -n 1 output.txt | grep -q "S_max: 0.404045" || (echo "Error: 'S_max: 0.404045' not found!" & false)
tail -n 1 output.txt | grep -q "|ssh|_max: 0.199" || (echo "Error: '|ssh|_max: 0.199' not found!" & false)
tail -n 1 output.txt | grep -q "|U|_max: 0.148" || (echo "Error: '|U|_max: 0.148' not found!" & false)
tail -n 1 output.txt | grep -q "S_min: 0.10" || (echo "Error: 'S_min: 0.10' not found!" & false)
tail -n 1 output.txt | grep -q "S_max: 0.404" || (echo "Error: 'S_max: 0.404' not found!" & false)
export VAR_TIME=$(grep -A 1 "Elapsed Time" output.txt | head -n 2 | tail -n 1 | awk '{print $1}')
echo $GITHUB_REF_NAME $GITHUB_SHA $VAR_TIME >> ${HOME}/store_results/performance_history_openmp_cpu
${HOME}/mongosh-2.1.1-linux-x64/bin/mongosh \
Expand Down
Loading

0 comments on commit 6b93d3c

Please sign in to comment.