Merge pull request #2647 from stfc/1992_mpi_support_for_extraction

1992 mpi support for extraction
stfc · Jul 11, 2024 · 9a94641 · 9a94641
2 parents eeda0d0 + 7a011e4
commit 9a94641
Show file tree

Hide file tree

Showing 47 changed files with 714 additions and 164 deletions.
diff --git a/changelog b/changelog
@@ -159,6 +159,10 @@
 	56) PR #2646 for #2641. Fixes incorrect module names within the
 	PSyData API (e.g. when profiling).
 
+	57) PR #2647 for #1992. Adds MPI support to PSyKE. Each rank will write
+        its own output file(s). The generated driver has been extended so that
+        the name of the file to use can be specified on the command line.
+
 release 2.5.0 14th of February 2024
 
 	1) PR #2199 for #2189. Fix bugs with missing maps in enter data

diff --git a/doc/user_guide/psyke.rst b/doc/user_guide/psyke.rst
@@ -153,11 +153,16 @@ are used or not.
 Distributed memory
 ##################
 
-As noted in the :ref:`PSyKAl Introduction <introduction_to_psykal>` section,
-PSyKAl can support distributed memory. However, since the generated PSy-layer
-code with DM enabled contains infrastructure calls (e.g. checks for runtime
-status of field halos, halo exchanges etc.), code extraction is not allowed
-when distributed memory is enabled.
+Kernel extraction for distributed memory is supported in as much as each
+process will write its own output file by adding its rank to the output
+file name. So each kernel and each rank will produce one file. It is possible
+to extract several consecutive kernels, but there must be no halo exchange
+calls between the kernels. The extraction transformation will test for this
+and raise an exception if this should happen.
+The compiled driver program accepts the name of the extracted kernel file as
+a command line parameter. If this is not specified, it will use the default
+name (``module-region`` without a rank).
+
 
 .. _psyke-intro-restrictions-shared:
 
@@ -176,6 +181,8 @@ The ``ExtractTrans`` transformation cannot be applied to:
 
 * An inner Loop without its parent outer Loop in the GOcean API.
 
+* Kernels that have a halo exchange call between them.
+
 .. _psyke-use:
 
 Use
@@ -381,6 +388,19 @@ and in
 `lib/extract/netcdf
 <https://github.com/stfc/PSyclone/tree/master/lib/extract/netcdf>`_.
 
+All versions of the extraction libraries can be compiled with MPI
+support by setting the variable ``MPI=yes``:
+
+.. code-block:: shell
+
+  make MPI=yes ...
+
+The only difference is that the output files will now have the process
+rank in the name. The compiled driver program accepts the name of the
+extracted kernel file as a command line parameter. If this is not specified,
+it will use the default name (``module-region`` without a rank).
+
+
 .. _extraction_for_gocean:
 
 Extraction for GOcean

diff --git a/examples/gocean/eg5/extract/.gitignore b/examples/gocean/eg5/extract/.gitignore
@@ -1,5 +1,6 @@
 alg.f90
 psy.f90
+test.x90
 extract_test.standalone
 extract_test.netcdf
 *.nc

diff --git a/examples/gocean/eg5/extract/Makefile b/examples/gocean/eg5/extract/Makefile
@@ -43,6 +43,17 @@
 # The dl_esm_inf extract wrapper library:
 #     export EXTRACT_DIR = ../../../../lib/extract/netcdf/dl_esm_inf
 
+MPI?=no
+
+ifeq ($(MPI), yes)
+	# -P suppresses linemarkers
+	FPP?=cpp -P -D_MPI
+	# We need to compile with mpif90 if MPI is enabled
+	F90?=mpif90
+else
+	FPP?=cpp -P
+endif
+
 PSYROOT=../../../..
 include $(PSYROOT)/examples/common.mk
 
@@ -51,7 +62,8 @@ TYPE?=standalone
 
 GENERATED_FILES += *.o *.mod $(NAME)  alg.f90 psy.f90   \
 		  $(DRIVER_INIT).$(TYPE)   $(DRIVER_INIT).f90   \
-		  $(DRIVER_UPDATE).$(TYPE) $(DRIVER_UPDATE).f90
+		  $(DRIVER_UPDATE).$(TYPE) $(DRIVER_UPDATE).f90 \
+		  main.x90
 
 # Location of the infrastucture code (which is a submodule of the
 # PSyclone git repo).
@@ -142,6 +154,9 @@ $(KERNELS):  $(INF_LIB)
 %.o: %.f90
 	$(F90) $(F90FLAGS) -c $<
 
+%.x90: %.X90
+	$(FPP) $< >$@
+
 # This target requires that the netcdf (Fortran) development package be
 # installed
 $(EXTRACT_DIR)/lib_kernel_data_netcdf.a:

diff --git a/examples/gocean/eg5/extract/test.x90 → examples/gocean/eg5/extract/test.X90 b/examples/gocean/eg5/extract/test.x90 → examples/gocean/eg5/extract/test.X90
@@ -1,4 +1,16 @@
 Program test
+    !> This program is a simple gocean program that contains two invokes,
+    !> the first to initialise a few fields, the second to update a field.
+    !> It can be used with kernel extraction for both invokes. After
+    !> instrumenting the code using PSyclone with the extract_transform.py
+    !> script, it can be compiled, and when executed will create two kernel data
+    !> files. The instrumentation will also create two stand-alone driver
+    !> programs, which can be compiled. They will each read the corresponding
+    !> kernel data file, execute the kernel, and compare the results.
+
+#ifdef _MPI
+    use mpi
+#endif
     USE field_mod
     USE grid_mod
     use decomposition_mod, only    : decomposition_type
@@ -17,6 +29,11 @@ Program test
     ! interface does not provide a method for single precision).
     real(kind=kind(1.0d0))        :: z
     TYPE(grid_type), target       :: grid
+#ifdef _MPI
+    integer                       :: ierr
+
+    call MPI_Init(ierr)
+#endif
 
     call parallel_init()
     call extract_PSyDataInit()
@@ -42,4 +59,9 @@ Program test
 
     print *,a_fld%data(1:5, 1:5)
     call extract_PSyDataShutdown()
+
+#ifdef _MPI
+    call MPI_Finalize(ierr)
+#endif
+
 end program test
diff --git a/examples/lfric/eg17/full_example_extract/.gitignore b/examples/lfric/eg17/full_example_extract/.gitignore
@@ -1,5 +1,6 @@
 extract.standalone
 extract.netcdf
+main.x90
 main_alg.f90
 main_psy.f90
 main-*.nc

diff --git a/examples/lfric/eg17/full_example_extract/Makefile b/examples/lfric/eg17/full_example_extract/Makefile
@@ -39,6 +39,17 @@
 # export F90=gfortran
 # export F90FLAGS="-Wall -g -fcheck=bound"
 
+MPI?=no
+
+ifeq ($(MPI), yes)
+	# -P suppresses linemarkers
+	FPP?=cpp -P -D_MPI
+	# We need to compile with mpif90 if MPI is enabled
+	F90?=mpif90
+else
+	FPP?=cpp -P
+endif
+
 PSYROOT=../../../..
 
 # Set it to 'netcdf' to use the NetCDF writer
@@ -53,7 +64,8 @@ else
 endif
 GENERATED_FILES += driver-main-init driver-main-init.F90       \
 				   driver-main-update driver-main-update.F90   \
-				   *.o *.mod $(EXEC) main_alg.f90 main_psy.f90
+				   *.o *.mod $(EXEC) main_alg.f90 main_psy.f90 \
+				   main.x90
 
 F90 ?= gfortran
 F90FLAGS ?= -Wall -g -ffree-line-length-none
@@ -118,13 +130,16 @@ driver-main-init:	driver-main-init.o
 	$(F90) $(F90FLAGS) $(LFRIC_INCLUDE_FLAGS) -c $<
 
 # Keep the generated psy and alg files
-.precious: main_psy.f90 main_alg.f90
+.precious: main.x90 main_psy.f90 main_alg.f90
 
 # This dependency will make sure that read_kernel_data_mod was created
 # (which will be inlined in the driver).
 main_psy.f90: $(EXTRACT_LIB)
 main_alg.f90: main_psy.f90
 
+%.x90:	%.X90
+	$(FPP) $< >$@
+
 %_psy.f90:	%.x90
 	${PSYCLONE} -api lfric -s ./extract_transform.py       \
 	-d . -d $(EXTRACT_DIR)                                 \

diff --git a/examples/lfric/eg17/full_example_extract/README.md b/examples/lfric/eg17/full_example_extract/README.md
@@ -40,6 +40,14 @@ export F90=gfortran
 export F90FLAGS="-Wall -g -fcheck=bound"
 ```
 
+This example can also be used to showcase the extraction if MPI is enabled.
+Note that the code is *not* setup to run in parallel with MPI, but it can
+be compiled with MPI and run as a single process job.
+Extraction in this case means that the single process will write
+its output data by appending its rank to the outpout filename. To enable
+this, set the environment variable ``MPI=yes``.
+
+
 The location of the PSyData wrapper library can be set with
 the environment variable ``EXTRACT_DIR`` specifying the location of the
 extraction library. The location of the LFRic infrastructure files is set

diff --git a/.../lfric/eg17/full_example_extract/main.x90 → .../lfric/eg17/full_example_extract/main.X90 b/.../lfric/eg17/full_example_extract/main.x90 → .../lfric/eg17/full_example_extract/main.X90
@@ -32,6 +32,18 @@
 ! Modifications: A. R. Porter, STFC Daresbury Laboratory
 
 program main
+    !> This program is a simple LFRic program that contains two invokes,
+    !> the first to initialise a few fields, the second to update a field.
+    !> It can be used with kernel extraction for both invokes. After
+    !> instrumenting the code using PSyclone with the extract_transform.py
+    !> script, it can be compiled, and when executed will create two kernel data
+    !> files. The instrumentation will also create two stand-alone driver
+    !> programs, which can be compiled. They will each read the corresponding
+    !> kernel data file, execute the kernel, and compare the results.
+
+#ifdef _MPI
+    use mpi
+#endif
 
     use global_mesh_base_mod,   only: global_mesh_base_type
     use mesh_mod,               only: mesh_type, PLANE
@@ -62,9 +74,13 @@ program main
     integer(kind=i_def)                        :: ndata_sz
     real(kind=r_def)                           :: one
     logical(kind=l_def)                        :: some_logical
-    integer                                    :: i
+    integer                                    :: i, ierr
 
     ! Use the unit-testing constructor:
+
+#ifdef _MPI
+    call MPI_Init(ierr)
+#endif
     global_mesh = global_mesh_base_type()
     global_mesh_ptr => global_mesh
 
@@ -107,4 +123,8 @@ program main
 
     call field1%log_minmax(LOG_LEVEL_ALWAYS, "minmax of field1")
 
+#ifdef _MPI
+    call MPI_Finalize(ierr)
+#endif
+
 end program main
diff --git a/lib/extract/netcdf/.gitignore b/lib/extract/netcdf/.gitignore
@@ -1,2 +1,2 @@
-extract_netcdf_base.f90
+extract_netcdf_base.F90
 
diff --git a/lib/extract/netcdf/Makefile b/lib/extract/netcdf/Makefile
@@ -42,8 +42,14 @@
 
 # ----------- Default "make" values, can be overwritten by the user -----------
 # Compiler and compiler flags
-F90 ?= gfortran
 F90FLAGS ?=
+ifeq ($(MPI), yes)
+       F90 ?= mpif90
+else
+       F90 ?= gfortran
+       F90FLAGS += -DNO_MPI
+endif
+
 # Path to the PSyclone wrapper libraries. It defaults to the relative path to
 # the top-level 'lib' directory. Overwrite for a custom location.
 PSYDATA_LIB_DIR ?= ./../..
@@ -59,7 +65,7 @@ PROCESS = $$($(PSYDATA_LIB_DIR)/get_python.sh) $(PSYDATA_LIB_DIR)/process.py
 
 default: extract_netcdf_base.o psy_data_base.o read_kernel_data_mod.o
 
-process: extract_netcdf_base.f90
+process: extract_netcdf_base.F90
 
 .PHONY: default process all clean allclean
 
@@ -78,8 +84,8 @@ extract_netcdf_base.o: psy_data_base.o
 psy_data_base.f90:	$(PSYDATA_LIB_DIR)/psy_data_base.jinja Makefile
 	$(PROCESS) $(PROCESS_ARGS) -generic-declare $< > psy_data_base.f90
 
-extract_netcdf_base.f90:	extract_netcdf_base.jinja Makefile
-	$(PROCESS) $(PROCESS_ARGS) -generic-provide $< > extract_netcdf_base.f90
+extract_netcdf_base.F90:	extract_netcdf_base.jinja Makefile
+	$(PROCESS) $(PROCESS_ARGS) -generic-provide $< > extract_netcdf_base.F90
 
 read_kernel_data_mod.f90:	read_kernel_data_mod.jinja Makefile
 	$(PROCESS) $(PROCESS_ARGS) -generic-provide $< > read_kernel_data_mod.f90
@@ -89,8 +95,11 @@ read_kernel_data_mod.f90:	read_kernel_data_mod.jinja Makefile
 %.o: %.f90
 	$(F90) $(F90FLAGS) -c $<
 
+%.o: %.F90
+	$(F90) $(F90FLAGS) -c $<
+
 clean:
-	rm -f extract_netcdf_base.f90 psy_data_base.f90
+	rm -f extract_netcdf_base.F90 psy_data_base.f90
 	rm -f *.o *.mod
 
 allclean:

diff --git a/lib/extract/netcdf/README.md b/lib/extract/netcdf/README.md
@@ -27,6 +27,10 @@ it in their own directory (to allow for the required data types to be
 supported). The API-specific implementations do not link with the compiled
 version in this directory.
 
+In order to support MPI in extraction (which means each process will write
+its own output data by appending its rank to the filename), set the environment
+variable ``MPI=yes``.
+
 ## [``dl_esm_inf``](./dl_esm_inf) directory
 
 Contains the NetCDF-extract, PSyData-API-based, wrapper library for the

diff --git a/lib/extract/netcdf/dl_esm_inf/Makefile b/lib/extract/netcdf/dl_esm_inf/Makefile
@@ -39,8 +39,14 @@
 
 # ----------- Default "make" values, can be overwritten by the user -----------
 # Compiler and compiler flags
-F90 ?= gfortran
 F90FLAGS ?=
+ifeq ($(MPI), yes)
+       F90 ?= mpif90
+else
+       F90 ?= gfortran
+       F90FLAGS += -DNO_MPI
+endif
+
 # Path to the 'dl_esm_inf' infrastructure library. It defaults to the version
 # distributed with PSyclone. Overwrite for a different infrastructure version.
 GOCEAN_INF_DIR ?= ./../../../../external/dl_esm_inf/finite_difference
@@ -82,8 +88,8 @@ psy_data_base.f90: $(PSYDATA_LIB_DIR)/psy_data_base.jinja Makefile
 read_kernel_data_mod.f90: $(LIB_TMPLT_DIR)/read_kernel_data_mod.jinja Makefile
 	$(PROCESS) $(PROCESS_ARGS) $< > read_kernel_data_mod.f90
 
-extract_netcdf_base.f90: $(LIB_TMPLT_DIR)/extract_netcdf_base.jinja Makefile
-	$(PROCESS) $(PROCESS_ARGS) -generic-declare -generic-provide $< > extract_netcdf_base.f90
+extract_netcdf_base.F90: $(LIB_TMPLT_DIR)/extract_netcdf_base.jinja Makefile
+	$(PROCESS) $(PROCESS_ARGS) -generic-declare -generic-provide $< > extract_netcdf_base.F90
 
 compare_variables_mod.F90: $(PSYDATA_LIB_DIR)/extract/compare_variables_mod.jinja Makefile
 	$(PROCESS) $(PROCESS_ARGS) -generic-declare -generic-provide $< > compare_variables_mod.F90

diff --git a/lib/extract/netcdf/dl_esm_inf/README.md b/lib/extract/netcdf/dl_esm_inf/README.md
@@ -63,6 +63,17 @@ so the exact path **must be specified** using the environment variable
 GOCEAN_INF_DIR=<path/to/dl_esm_inf/finite_difference> make
 ```
 
+The library can be compiled with MPI support, which will add the process rank
+to each output filename, by setting ``MPI=yes``:
+
+```shell
+MPI=yes make
+```
+
+The build environment will provide ``NO_MPI`` as a pre-processor definition when
+MPI is not enabled (to be compatible with LFRic) so that MPI specific code can
+be disabled.
+
 The locations of the ExtractNetcdf and PSyData base classes are
 specified using the environment variables ``$LIB_TMPLT_DIR`` and
 ``$PSYDATA_LIB_DIR``, respectively. They default to the relative paths to
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1,2 @@
		extract_netcdf_base.f90
		extract_netcdf_base.F90