GPU project with Gray-Scott configurations (#496)

* Structure for GPU project and configurations for Gray-Scott * Fixed GS initial conditions in spectral mode * Added Game-of-Life adjacent configuration for Gray-Scott * Blobs evolving in different directions * Added USkate world configuration for Gray-Scott * Added script for scaling tests * Changed configurations a bit * Changed configurations #!!!!!! WARNING: FLAKEHEAVEN FAILED !!!!!!: #: * Changed configs * Added readme for GPU project * Implemented node limit in scaling #!!!!!! WARNING: FLAKEHEAVEN FAILED !!!!!!: #: --------- Co-authored-by: Thomas <t.baumann@fz-juelich.de>
Parallel-in-Time · Oct 21, 2024 · b2030e4 · b2030e4
1 parent 5fccc9f
commit b2030e4
Show file tree

Hide file tree

Showing 27 changed files with 1,446 additions and 35 deletions.
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -53,6 +53,7 @@ Projects
    projects/compression.rst
    projects/second_order.rst
    projects/monodomain.rst
+   projects/GPU.rst
 
 
 API documentation

diff --git a/docs/source/projects/GPU.rst b/docs/source/projects/GPU.rst
@@ -0,0 +1 @@
+.. include:: /../../pySDC/projects/GPU/README.rst
diff --git a/pySDC/implementations/datatype_classes/cupy_mesh.py b/pySDC/implementations/datatype_classes/cupy_mesh.py
@@ -5,6 +5,11 @@
 except ImportError:
     MPI = None
 
+try:
+    from pySDC.helpers.NCCL_communicator import NCCLComm
+except ImportError:
+    NCCLComm = None
+
 
 class cupy_mesh(cp.ndarray):
     """
@@ -31,7 +36,7 @@ def __new__(cls, init, val=0.0, **kwargs):
             obj[:] = init[:]
         elif (
             isinstance(init, tuple)
-            and (init[1] is None or isinstance(init[1], MPI.Intracomm))
+            and (init[1] is None or isinstance(init[1], MPI.Intracomm) or isinstance(init[1], NCCLComm))
             and isinstance(init[2], cp.dtype)
         ):
             obj = cp.ndarray.__new__(cls, init[0], dtype=init[2], **kwargs)
@@ -62,12 +67,15 @@ def __abs__(self):
             float: absolute maximum of all mesh values
         """
         # take absolute values of the mesh values
-        local_absval = float(cp.amax(cp.ndarray.__abs__(self)))
+        local_absval = cp.max(cp.ndarray.__abs__(self))
 
         if self.comm is not None:
             if self.comm.Get_size() > 1:
-                global_absval = 0.0
-                global_absval = max(self.comm.allreduce(sendobj=local_absval, op=MPI.MAX), global_absval)
+                global_absval = local_absval * 0
+                if isinstance(self.comm, NCCLComm):
+                    self.comm.Allreduce(sendbuf=local_absval, recvbuf=global_absval, op=MPI.MAX)
+                else:
+                    global_absval = self.comm.allreduce(sendobj=float(local_absval), op=MPI.MAX)
             else:
                 global_absval = local_absval
         else:

diff --git a/pySDC/implementations/problem_classes/GrayScott_MPIFFT.py b/pySDC/implementations/problem_classes/GrayScott_MPIFFT.py
@@ -1,10 +1,8 @@
 import scipy.sparse as sp
-from mpi4py import MPI
-from mpi4py_fft import PFFT
 
 from pySDC.core.errors import ProblemError
-from pySDC.core.problem import Problem, WorkCounter
-from pySDC.implementations.datatype_classes.mesh import mesh, imex_mesh, comp2_mesh
+from pySDC.core.problem import WorkCounter
+from pySDC.implementations.datatype_classes.mesh import comp2_mesh
 from pySDC.implementations.problem_classes.generic_MPIFFT_Laplacian import IMEX_Laplacian_MPIFFT
 
 from mpi4py_fft import newDistArray
@@ -48,6 +46,8 @@ class grayscott_imex_diffusion(IMEX_Laplacian_MPIFFT):
         Denotes the period of the function to be approximated for the Fourier transform.
     comm : COMM_WORLD, optional
         Communicator for ``mpi4py-fft``.
+    num_blobs : int, optional
+        Number of blobs in the initial conditions. Negative values give rectangles.
 
     Attributes
     ----------
@@ -71,18 +71,35 @@ class grayscott_imex_diffusion(IMEX_Laplacian_MPIFFT):
     .. [3] https://www.chebfun.org/examples/pde/GrayScott.html
     """
 
-    def __init__(self, Du=1.0, Dv=0.01, A=0.09, B=0.086, **kwargs):
-        kwargs['L'] = 2.0
-        super().__init__(dtype='d', alpha=1.0, x0=-kwargs['L'] / 2.0, **kwargs)
+    def __init__(
+        self,
+        Du=1.0,
+        Dv=0.01,
+        A=0.09,
+        B=0.086,
+        L=2.0,
+        num_blobs=1,
+        **kwargs,
+    ):
+        super().__init__(dtype='d', alpha=1.0, x0=-L / 2.0, L=L, **kwargs)
 
         # prepare the array with two components
         shape = (2,) + (self.init[0])
         self.iU = 0
         self.iV = 1
         self.ncomp = 2  # needed for transfer class
-        self.init = (shape, self.comm, self.xp.dtype('float'))
 
-        self._makeAttributeAndRegister('Du', 'Dv', 'A', 'B', localVars=locals(), readOnly=True)
+        self.init = (shape, self.comm, self.xp.dtype('complex') if self.spectral else self.xp.dtype('float'))
+
+        self._makeAttributeAndRegister(
+            'Du',
+            'Dv',
+            'A',
+            'B',
+            'num_blobs',
+            localVars=locals(),
+            readOnly=True,
+        )
 
         # prepare "Laplacians"
         self.Ku = -self.Du * self.K2
@@ -168,7 +185,7 @@ def solve_system(self, rhs, factor, u0, t):
 
         return me
 
-    def u_exact(self, t):
+    def u_exact(self, t, seed=10700000):
         r"""
         Routine to compute the exact solution at time :math:`t = 0`, see [3]_.
 
@@ -185,19 +202,135 @@ def u_exact(self, t):
         assert t == 0.0, 'Exact solution only valid as initial condition'
         assert self.ndim == 2, 'The initial conditions are 2D for now..'
 
-        me = self.dtype_u(self.init, val=0.0)
+        xp = self.xp
+
+        _u = xp.zeros_like(self.X[0])
+        _v = xp.zeros_like(self.X[0])
+
+        rng = xp.random.default_rng(seed)
+
+        if self.num_blobs < 0:
+            """
+            Rectangles with stationary background, see arXiv:1501.01990
+            """
+            F, k = self.A, self.B - self.A
+            A = xp.sqrt(F) / (F + k)
+
+            # set stable background state from Equation 2
+            assert 2 * k < xp.sqrt(F) - 2 * F, 'Kill rate is too large to facilitate stable background'
+            _u[...] = (A - xp.sqrt(A**2 - 4)) / (2 * A)
+            _v[...] = xp.sqrt(F) * (A + xp.sqrt(A**2 - 4)) / 2
+
+            for _ in range(-self.num_blobs):
+                x0, y0 = rng.random(size=2) * self.L[0] - self.L[0] / 2
+                lx, ly = rng.random(size=2) * self.L[0] / self.nvars[0] * 30
+
+                mask_x = xp.logical_and(self.X[0] > x0, self.X[0] < x0 + lx)
+                mask_y = xp.logical_and(self.X[1] > y0, self.X[1] < y0 + ly)
+                mask = xp.logical_and(mask_x, mask_y)
+
+                _u[mask] = rng.random()
+                _v[mask] = rng.random()
+
+        elif self.num_blobs > 0:
+            """
+            Blobs as in https://www.chebfun.org/examples/pde/GrayScott.html
+            """
+
+            inc = self.L[0] / (self.num_blobs + 1)
+
+            for i in range(1, self.num_blobs + 1):
+                for j in range(1, self.num_blobs + 1):
+                    signs = (-1) ** rng.integers(low=0, high=2, size=2)
+
+                    # This assumes that the box is [-L/2, L/2]^2
+                    _u[...] += -xp.exp(
+                        -80.0
+                        * (
+                            (self.X[0] + self.x0 + inc * i + signs[0] * 0.05) ** 2
+                            + (self.X[1] + self.x0 + inc * j + signs[1] * 0.02) ** 2
+                        )
+                    )
+                    _v[...] += xp.exp(
+                        -80.0
+                        * (
+                            (self.X[0] + self.x0 + inc * i - signs[0] * 0.05) ** 2
+                            + (self.X[1] + self.x0 + inc * j - signs[1] * 0.02) ** 2
+                        )
+                    )
+
+            _u += 1
+        else:
+            raise NotImplementedError
 
-        # This assumes that the box is [-L/2, L/2]^2
+        u = self.u_init
         if self.spectral:
-            tmp = 1.0 - self.xp.exp(-80.0 * ((self.X[0] + 0.05) ** 2 + (self.X[1] + 0.02) ** 2))
-            me[0, ...] = self.fft.forward(tmp)
-            tmp = self.xp.exp(-80.0 * ((self.X[0] - 0.05) ** 2 + (self.X[1] - 0.02) ** 2))
-            me[1, ...] = self.fft.forward(tmp)
+            u[0, ...] = self.fft.forward(_u)
+            u[1, ...] = self.fft.forward(_v)
         else:
-            me[0, ...] = 1.0 - self.xp.exp(-80.0 * ((self.X[0] + 0.05) ** 2 + (self.X[1] + 0.02) ** 2))
-            me[1, ...] = self.xp.exp(-80.0 * ((self.X[0] - 0.05) ** 2 + (self.X[1] - 0.02) ** 2))
+            u[0, ...] = _u
+            u[1, ...] = _v
 
-        return me
+        return u
+
+    def get_fig(self, n_comps=2):  # pragma: no cover
+        """
+        Get a figure suitable to plot the solution of this problem
+
+        Args:
+        n_comps (int): Number of components that fit in the solution
+
+        Returns
+        -------
+        self.fig : matplotlib.pyplot.figure.Figure
+        """
+        import matplotlib.pyplot as plt
+        from mpl_toolkits.axes_grid1 import make_axes_locatable
+
+        plt.rcParams['figure.constrained_layout.use'] = True
+
+        if n_comps == 2:
+            self.fig, axs = plt.subplots(1, 2, sharex=True, sharey=True, figsize=((6, 3)))
+            divider = make_axes_locatable(axs[1])
+            self.cax = divider.append_axes('right', size='3%', pad=0.03)
+        else:
+            self.fig, ax = plt.subplots(1, 1, figsize=((6, 5)))
+            divider = make_axes_locatable(ax)
+            self.cax = divider.append_axes('right', size='3%', pad=0.03)
+        return self.fig
+
+    def plot(self, u, t=None, fig=None):  # pragma: no cover
+        r"""
+        Plot the solution. Please supply a figure with the same structure as returned by ``self.get_fig``.
+
+        Parameters
+        ----------
+        u : dtype_u
+            Solution to be plotted
+        t : float
+            Time to display at the top of the figure
+        fig : matplotlib.pyplot.figure.Figure
+            Figure with the correct structure
+
+        Returns
+        -------
+        None
+        """
+        fig = self.get_fig(n_comps=2) if fig is None else fig
+        axs = fig.axes
+
+        vmin = u.min()
+        vmax = u.max()
+        for i, label in zip([self.iU, self.iV], [r'$u$', r'$v$']):
+            im = axs[i].pcolormesh(self.X[0], self.X[1], u[i], vmin=vmin, vmax=vmax)
+            axs[i].set_aspect(1)
+            axs[i].set_title(label)
+
+        if t is not None:
+            fig.suptitle(f't = {t:.2e}')
+        axs[0].set_xlabel(r'$x$')
+        axs[0].set_ylabel(r'$y$')
+        fig.colorbar(im, self.cax)
 
 
 class grayscott_imex_linear(grayscott_imex_diffusion):

diff --git a/pySDC/implementations/problem_classes/generic_MPIFFT_Laplacian.py b/pySDC/implementations/problem_classes/generic_MPIFFT_Laplacian.py
@@ -66,8 +66,6 @@ def setup_GPU(cls):
     def __init__(
         self, nvars=None, spectral=False, L=2 * np.pi, alpha=1.0, comm=MPI.COMM_WORLD, dtype='d', useGPU=False, x0=0.0
     ):
-        """Initialization routine"""
-
         if useGPU:
             self.setup_GPU()
 
@@ -98,7 +96,7 @@ def __init__(
         # invoke super init, passing the communicator and the local dimensions as init
         super().__init__(init=(tmp_u.shape, comm, tmp_u.dtype))
         self._makeAttributeAndRegister(
-            'nvars', 'spectral', 'L', 'alpha', 'comm', 'x0', localVars=locals(), readOnly=True
+            'nvars', 'spectral', 'L', 'alpha', 'comm', 'x0', 'useGPU', localVars=locals(), readOnly=True
         )
 
         # get local mesh

diff --git a/pySDC/projects/GPU/README.md → pySDC/projects/GPU/README.rst b/pySDC/projects/GPU/README.md → pySDC/projects/GPU/README.rst
@@ -1,8 +1,9 @@
 pySDC using GPUs
-===================
+================
+
 Installation
 ------------
-In order to start playing on GPU, install `pySDC` and it's dependencies, ideally in developer mode.
+In order to start playing on GPU, install `pySDC` and its dependencies, ideally in developer mode.
 First start by setting up a virtual environment, e.g. by using [Miniconda](https://docs.conda.io/en/latest/miniconda.html).
 Then also add the CuPy Package (the cuda-toolkit will be installed automatically):
 
@@ -13,28 +14,49 @@ Then also add the CuPy Package (the cuda-toolkit will be installed automatically
 When this is done (and it can take a while), you have your setup to run `pySDC` on the GPU.
 
 Changes in the problem_classes
-------------
+------------------------------
 Now you have to change a little bit in the problem_classes. The first and easy step is to change the datatype.
 To use pySDC on the GPU with CuPy you must use the [cupy-datatype](../../implementations/datatype_classes/cupy_mesh.py).
 The next step is to import cupy in the problem_class. In the following you have to exchange the NumPy/SciPy functions with the CuPy functions.
 A [Comparison Table](https://docs.cupy.dev/en/latest/reference/comparison.html) is given from CuPy to do that.
-For Exmaple: The above steps can be traced using the files 
+For example: The above steps can be traced using the files 
 [HeatEquation_ND_FD_forced_periodic.py](../../implementations/problem_classes/HeatEquation_ND_FD_forced_periodic.py) 
 and [HeatEquation_ND_FD_forced_periodic_gpu.py](../../implementations/problem_classes/HeatEquation_ND_FD_forced_periodic.py)
 Now you are ready to run `pySDC` on the GPU. 
 
 Run pySDC on the GPU
-------------
-You have to configure a Script to run it. You can see at the file [heat.py](heat.py) that the parameters are the 
+--------------------
+You have to configure a script to run it. You can see at the file [heat.py](heat.py) that the parameters are the 
 same for GPU and CPU. Only the import for the problem_class changed.  
 
-
-
 More examples
-----------
+-------------
 Further examples can found with Allen-Cahn:
 * problem: [AllenCahn_2D_FD.py](../../implementations/problem_classes/AllenCahn_2D_FD.py) and [AllenCahn_2D_FD_gpu.py](../../implementations/problem_classes/AllenCahn_2D_FD_gpu.py)
 * problem: [AllenCahn_2D_FFT.py](../../implementations/problem_classes/AllenCahn_2D_FFT.py) and [AllenCahn_2D_FFT_gpu.py](../../implementations/problem_classes/AllenCahn_2D_FFT_gpu.py)
   * Script to run pySDC: [ac-fft.py](ac-fft.py)
 
 
+Running large problems on GPU
+-----------------------------
+This project contains some infrastructure for running and plotting specific problems.
+The main file is `run_experiment` and can be configured using command line arguments.
+For instance, use
+
+.. code-block:: bash
+ 
+    srun -n 4 python work_precision.py --config=GS_USkate --procs=1/1/4 --useGPU=True --mode=run
+    mpirun -np 8 python work_precision.py --config=GS_USkate --procs=1/1/4 --useGPU=True --mode=plot
+    python work_precision.py --config=GS_USkate --procs=1/1/4 --useGPU=True --mode=video
+
+to first run the problem, then make plots and then make a video for Gray-Scott with the U-Skate configuration (see arXiv:1501.01990).
+
+To do a parallel scaling test, you can go to JUWELS Booster and use, for instance,
+
+.. code-block:: bash
+   python analysis_scripts/parallel_scaling.py --mode=run --scaling=strong --space_time=True --XPU=GPU --problem=GS
+   srun python analysis_scripts/parallel_scaling.py --mode=plot --scaling=strong --space_time=True --XPU=GPU --problem=GS
+
+This will generate jobscripts and submit the jobs. Notice that you have to wait for the jobs to complete before you can plot them.
+
+To learn more about the options for the scripts, run them with `--help`.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		.. include:: /../../pySDC/projects/GPU/README.rst