From 4ac5962ef4fce6e4f812ab64bed2f584ea412e91 Mon Sep 17 00:00:00 2001 From: Olga Shapoval <30510597+oshapoval@users.noreply.github.com> Date: Mon, 29 Jul 2024 17:07:18 -0700 Subject: [PATCH] Fix bug with ES solver and MR: `E_aux=E_fp` in `UpdateAuxilaryData` (#4922) * Removal of asserttion which prevented from usung the averaged PSATD algorithms with PML BC * Clean-up * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fixed to arr_aux(j,k,l) = fine when ES solve is used * Removed temporary print statements * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Clean-up * Added CI test ElectrostaticSphereEB_RZ_MR_lev_1 to check the fields on the level=1 * Updated becnmarks for ElectrostaticSphereLabFrame_MR_emass_10 * Imported regular expression (re) in the analysis script. * Fixed typo * United two CI tests for different levels of MR in one test. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Updated CI test ElectrostaticSphereEB_RZ_MR and the corresponding analysis script with smaller MR patch. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Clean-up * Do deepcopy for lev>0 and collocated grid * Fix bugs to resolve failure of CI tests * Preserve plotfile output, update benchmark file * Working on CI test * Update benchmark of `ElectrostaticSphereEB_RZ_MR` * Initialize with value all `aux`, `cax` fields * Remove changes related to averaged Galilean PSATD with PML * Remove style changes (e.g., changes to empty lines) * Replace `MFIter`/`ParallelFor` loop with simple copy * Apply suggestions from code review * Revert part of the code to its previous, equivalent state * Removed DeepCopy & no need for ghost cells in temp phi_cp. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update Source/ablastr/fields/PoissonSolver.H * Add inline comments * Update analysis script * Use `TilingIfNotGPU`, `growntilebox` in E loop --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Edoardo Zoni Co-authored-by: Edoardo Zoni <59625522+EZoni@users.noreply.github.com> Co-authored-by: Remi Lehe Co-authored-by: Weiqun Zhang --- .../electrostatic_sphere_eb/analysis_rz_mr.py | 99 ++++ .../electrostatic_sphere_eb/inputs_rz_mr | 1 + .../ElectrostaticSphereEB_RZ_MR.json | 8 +- ...ectrostaticSphereLabFrame_MR_emass_10.json | 12 +- Regression/WarpX-tests.ini | 5 +- Source/Parallelization/WarpXComm.cpp | 510 ++++++++++-------- Source/Parallelization/WarpXComm_K.H | 90 +++- Source/ablastr/fields/PoissonSolver.H | 21 +- 8 files changed, 508 insertions(+), 238 deletions(-) create mode 100755 Examples/Tests/electrostatic_sphere_eb/analysis_rz_mr.py diff --git a/Examples/Tests/electrostatic_sphere_eb/analysis_rz_mr.py b/Examples/Tests/electrostatic_sphere_eb/analysis_rz_mr.py new file mode 100755 index 00000000000..0b01b128362 --- /dev/null +++ b/Examples/Tests/electrostatic_sphere_eb/analysis_rz_mr.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python + +# Copyright 2024 Olga Shapoval, Edoardo Zoni +# +# This file is part of WarpX. +# +# License: BSD-3-Clause-LBNL + +# This script tests the embedded boundary in RZ. +# A cylindrical surface (r=0.1) has a fixed potential 1 V. +# The outer surface has 0 V fixed. +# Thus the analytical solution has the form: +# phi(r) = A+B*log(r), Er(r) = -B/r. + +import os +import sys + +import numpy as np +from openpmd_viewer import OpenPMDTimeSeries + +sys.path.insert(1, '../../../../warpx/Regression/Checksum/') +import checksumAPI + +tolerance = 0.004 +print(f'tolerance = {tolerance}') + +fn = sys.argv[1] + +def find_first_non_zero_from_bottom_left(matrix): + for i in range(matrix.shape[0]): + for j in range(matrix.shape[1]): + if (matrix[i][j] != 0) and (matrix[i][j] != np.nan): + return (i, j) + return i, j + +def find_first_non_zero_from_upper_right(matrix): + for i in range(matrix.shape[0]-1, -1, -1): + for j in range(matrix.shape[1]-1, -1, -1): + if (matrix[i][j] != 0) and (matrix[i][j] != np.nan): + return (i, j) + return i,j + +def get_fields(ts, level): + if level == 0: + Er, info = ts.get_field('E', 'r', iteration=0) + phi, info = ts.get_field('phi', iteration=0) + else: + Er, info = ts.get_field(f'E_lvl{level}', 'r', iteration=0) + phi, info = ts.get_field(f'phi_lvl{level}', iteration=0) + return Er, phi, info + +def get_error_per_lev(ts,level): + Er, phi, info = get_fields(ts, level) + + nr_half = info.r.shape[0] // 2 + dr = info.dr + + Er_patch = Er[:,nr_half:] + phi_patch = phi[:,nr_half:] + r1 = info.r[nr_half:] + patch_left_lower_i, patch_left_lower_j = find_first_non_zero_from_bottom_left(Er_patch) + patch_right_upper_i, patch_right_upper_j = find_first_non_zero_from_upper_right(Er_patch) + + # phi and Er field on the MR patch + phi_sim = phi_patch[patch_left_lower_i:patch_right_upper_i+1, patch_left_lower_j:patch_right_upper_j+1] + Er_sim = Er_patch[patch_left_lower_i:patch_right_upper_i+1, patch_left_lower_j:patch_right_upper_j+1] + r = r1[patch_left_lower_j:patch_right_upper_j+1] + + B = 1.0/np.log(0.1/0.5) + A = -B*np.log(0.5) + + # outside EB and last cutcell + rmin = np.min(np.argwhere(r >= (0.1+dr))) + rmax = -1 + r = r[rmin:rmax] + phi_sim = phi_sim[:,rmin:rmax] + Er_sim = Er_sim[:,rmin:rmax] + + phi_theory = A + B*np.log(r) + phi_theory = np.tile(phi_theory, (phi_sim.shape[0],1)) + phi_error = np.max(np.abs(phi_theory-phi_sim) / np.abs(phi_theory)) + + Er_theory = -B/r + Er_theory = np.tile(Er_theory, (Er_sim.shape[0],1)) + Er_error = np.max(np.abs(Er_theory-Er_sim) / np.abs(Er_theory)) + + print(f'max error of phi[lev={level}]: {phi_error}') + print(f'max error of Er[lev={level}]: {Er_error}') + assert(phi_error < tolerance) + assert(Er_error < tolerance) + +ts = OpenPMDTimeSeries(fn) +level_fields = [field for field in ts.avail_fields if 'lvl' in field] +nlevels = 0 if level_fields == [] else int(level_fields[-1][-1]) +for level in range(nlevels+1): + get_error_per_lev(ts,level) + +test_name = os.path.split(os.getcwd())[1] +checksumAPI.evaluate_checksum(test_name, fn, output_format="openpmd") diff --git a/Examples/Tests/electrostatic_sphere_eb/inputs_rz_mr b/Examples/Tests/electrostatic_sphere_eb/inputs_rz_mr index 3bea63d76fb..722fc916416 100644 --- a/Examples/Tests/electrostatic_sphere_eb/inputs_rz_mr +++ b/Examples/Tests/electrostatic_sphere_eb/inputs_rz_mr @@ -30,3 +30,4 @@ diagnostics.diags_names = diag1 diag1.intervals = 1 diag1.diag_type = Full diag1.fields_to_plot = Er phi +diag1.format = openpmd diff --git a/Regression/Checksum/benchmarks_json/ElectrostaticSphereEB_RZ_MR.json b/Regression/Checksum/benchmarks_json/ElectrostaticSphereEB_RZ_MR.json index ffa8d68f9d9..6bbfce0e3b3 100644 --- a/Regression/Checksum/benchmarks_json/ElectrostaticSphereEB_RZ_MR.json +++ b/Regression/Checksum/benchmarks_json/ElectrostaticSphereEB_RZ_MR.json @@ -1,10 +1,10 @@ { "lev=0": { - "Er": 8487.661571739109, - "phi": 2036.0428085225362 + "Er": 16975.32314347822, + "phi": 4072.085617045073 }, "lev=1": { - "Er": 19519.172334977942, - "phi": 3291.0262856782897 + "Er": 26818.189739547757, + "phi_lvl1": 8731.176548788893 } } diff --git a/Regression/Checksum/benchmarks_json/ElectrostaticSphereLabFrame_MR_emass_10.json b/Regression/Checksum/benchmarks_json/ElectrostaticSphereLabFrame_MR_emass_10.json index 024127a1bd2..21d5208c59a 100644 --- a/Regression/Checksum/benchmarks_json/ElectrostaticSphereLabFrame_MR_emass_10.json +++ b/Regression/Checksum/benchmarks_json/ElectrostaticSphereLabFrame_MR_emass_10.json @@ -6,15 +6,15 @@ "rho": 0.0 }, "lev=1": { - "Ex": 14.281015560380963, - "Ey": 14.281015560380965, - "Ez": 14.281015560380965, + "Ex": 7.170105936287823, + "Ey": 7.17010593628782, + "Ez": 7.170105936287821, "rho": 2.6092568008333786e-10 }, "electron": { - "particle_momentum_x": 1.80842228672388e-24, - "particle_momentum_y": 1.8084222867238806e-24, - "particle_momentum_z": 1.7598771525647628e-24, + "particle_momentum_x": 9.257577597262615e-25, + "particle_momentum_y": 9.257577597262618e-25, + "particle_momentum_z": 9.257577597262624e-25, "particle_position_x": 327.46875, "particle_position_y": 327.46875, "particle_position_z": 327.46875, diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini index a1dc0a168f7..8048318de7a 100644 --- a/Regression/WarpX-tests.ini +++ b/Regression/WarpX-tests.ini @@ -483,7 +483,7 @@ analysisRoutine = Examples/Tests/electrostatic_sphere_eb/analysis_rz.py [ElectrostaticSphereEB_RZ_MR] buildDir = . inputFile = Examples/Tests/electrostatic_sphere_eb/inputs_rz_mr -runtime_params = warpx.abort_on_warning_threshold = medium +runtime_params = warpx.abort_on_warning_threshold = medium amr.ref_ratio_vect = 2 2 2 dim = 2 addToCompileString = USE_EB=TRUE USE_RZ=TRUE cmakeSetupOpts = -DWarpX_DIMS=RZ -DWarpX_EB=ON @@ -492,7 +492,8 @@ useMPI = 1 numprocs = 2 useOMP = 1 numthreads = 1 -analysisRoutine = Examples/Tests/electrostatic_sphere_eb/analysis_rz.py +outputFile = ElectrostaticSphereEB_RZ_MR_plt +analysisRoutine = Examples/Tests/electrostatic_sphere_eb/analysis_rz_mr.py [ElectrostaticSphereLabFrame] buildDir = . diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index e7df489236e..2887bd4d056 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -171,136 +171,191 @@ WarpX::UpdateAuxilaryDataStagToNodal () // Bfield { - Array,3> Btmp; - if (Bfield_cax[lev][0]) { - for (int i = 0; i < 3; ++i) { - Btmp[i] = std::make_unique( - *Bfield_cax[lev][i], amrex::make_alias, 0, 1); + if (electromagnetic_solver_id != ElectromagneticSolverAlgo::None) { + Array,3> Btmp; + if (Bfield_cax[lev][0]) { + for (int i = 0; i < 3; ++i) { + Btmp[i] = std::make_unique( + *Bfield_cax[lev][i], amrex::make_alias, 0, 1); + } + } else { + const IntVect ngtmp = Bfield_aux[lev-1][0]->nGrowVect(); + for (int i = 0; i < 3; ++i) { + Btmp[i] = std::make_unique(cnba, dm, 1, ngtmp); + } } - } else { - const IntVect ngtmp = Bfield_aux[lev-1][0]->nGrowVect(); + Btmp[0]->setVal(0.0); + Btmp[1]->setVal(0.0); + Btmp[2]->setVal(0.0); + // ParallelCopy from coarse level for (int i = 0; i < 3; ++i) { - Btmp[i] = std::make_unique(cnba, dm, 1, ngtmp); + const IntVect ng = Btmp[i]->nGrowVect(); + // Guard cells may not be up to date beyond ng_FieldGather + const amrex::IntVect& ng_src = guard_cells.ng_FieldGather; + // Copy Bfield_aux to Btmp, using up to ng_src (=ng_FieldGather) guard cells from + // Bfield_aux and filling up to ng (=nGrow) guard cells in Btmp + ablastr::utils::communication::ParallelCopy(*Btmp[i], *Bfield_aux[lev - 1][i], 0, 0, 1, + ng_src, ng, WarpX::do_single_precision_comms, cperiod); } - } - Btmp[0]->setVal(0.0); - Btmp[1]->setVal(0.0); - Btmp[2]->setVal(0.0); - // ParallelCopy from coarse level - for (int i = 0; i < 3; ++i) { - const IntVect ng = Btmp[i]->nGrowVect(); - // Guard cells may not be up to date beyond ng_FieldGather - const amrex::IntVect& ng_src = guard_cells.ng_FieldGather; - // Copy Bfield_aux to Btmp, using up to ng_src (=ng_FieldGather) guard cells from - // Bfield_aux and filling up to ng (=nGrow) guard cells in Btmp - ablastr::utils::communication::ParallelCopy(*Btmp[i], *Bfield_aux[lev - 1][i], 0, 0, 1, - ng_src, ng, WarpX::do_single_precision_comms, cperiod); - } - const amrex::IntVect& refinement_ratio = refRatio(lev-1); + const amrex::IntVect& refinement_ratio = refRatio(lev-1); - const amrex::IntVect& Bx_fp_stag = Bfield_fp[lev][0]->ixType().toIntVect(); - const amrex::IntVect& By_fp_stag = Bfield_fp[lev][1]->ixType().toIntVect(); - const amrex::IntVect& Bz_fp_stag = Bfield_fp[lev][2]->ixType().toIntVect(); + const amrex::IntVect& Bx_fp_stag = Bfield_fp[lev][0]->ixType().toIntVect(); + const amrex::IntVect& By_fp_stag = Bfield_fp[lev][1]->ixType().toIntVect(); + const amrex::IntVect& Bz_fp_stag = Bfield_fp[lev][2]->ixType().toIntVect(); - const amrex::IntVect& Bx_cp_stag = Bfield_cp[lev][0]->ixType().toIntVect(); - const amrex::IntVect& By_cp_stag = Bfield_cp[lev][1]->ixType().toIntVect(); - const amrex::IntVect& Bz_cp_stag = Bfield_cp[lev][2]->ixType().toIntVect(); + const amrex::IntVect& Bx_cp_stag = Bfield_cp[lev][0]->ixType().toIntVect(); + const amrex::IntVect& By_cp_stag = Bfield_cp[lev][1]->ixType().toIntVect(); + const amrex::IntVect& Bz_cp_stag = Bfield_cp[lev][2]->ixType().toIntVect(); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(*Bfield_aux[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi) - { - Array4 const& bx_aux = Bfield_aux[lev][0]->array(mfi); - Array4 const& by_aux = Bfield_aux[lev][1]->array(mfi); - Array4 const& bz_aux = Bfield_aux[lev][2]->array(mfi); - Array4 const& bx_fp = Bfield_fp[lev][0]->const_array(mfi); - Array4 const& by_fp = Bfield_fp[lev][1]->const_array(mfi); - Array4 const& bz_fp = Bfield_fp[lev][2]->const_array(mfi); - Array4 const& bx_cp = Bfield_cp[lev][0]->const_array(mfi); - Array4 const& by_cp = Bfield_cp[lev][1]->const_array(mfi); - Array4 const& bz_cp = Bfield_cp[lev][2]->const_array(mfi); - Array4 const& bx_c = Btmp[0]->const_array(mfi); - Array4 const& by_c = Btmp[1]->const_array(mfi); - Array4 const& bz_c = Btmp[2]->const_array(mfi); - - const Box& bx = mfi.growntilebox(); - amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + for (MFIter mfi(*Bfield_aux[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi) { - warpx_interp(j, k, l, bx_aux, bx_fp, bx_cp, bx_c, Bx_fp_stag, Bx_cp_stag, refinement_ratio); - warpx_interp(j, k, l, by_aux, by_fp, by_cp, by_c, By_fp_stag, By_cp_stag, refinement_ratio); - warpx_interp(j, k, l, bz_aux, bz_fp, bz_cp, bz_c, Bz_fp_stag, Bz_cp_stag, refinement_ratio); - }); + Array4 const& bx_aux = Bfield_aux[lev][0]->array(mfi); + Array4 const& by_aux = Bfield_aux[lev][1]->array(mfi); + Array4 const& bz_aux = Bfield_aux[lev][2]->array(mfi); + Array4 const& bx_fp = Bfield_fp[lev][0]->const_array(mfi); + Array4 const& by_fp = Bfield_fp[lev][1]->const_array(mfi); + Array4 const& bz_fp = Bfield_fp[lev][2]->const_array(mfi); + Array4 const& bx_cp = Bfield_cp[lev][0]->const_array(mfi); + Array4 const& by_cp = Bfield_cp[lev][1]->const_array(mfi); + Array4 const& bz_cp = Bfield_cp[lev][2]->const_array(mfi); + Array4 const& bx_c = Btmp[0]->const_array(mfi); + Array4 const& by_c = Btmp[1]->const_array(mfi); + Array4 const& bz_c = Btmp[2]->const_array(mfi); + + const Box& bx = mfi.growntilebox(); + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp(j, k, l, bx_aux, bx_fp, bx_cp, bx_c, Bx_fp_stag, Bx_cp_stag, refinement_ratio); + warpx_interp(j, k, l, by_aux, by_fp, by_cp, by_c, By_fp_stag, By_cp_stag, refinement_ratio); + warpx_interp(j, k, l, bz_aux, bz_fp, bz_cp, bz_c, Bz_fp_stag, Bz_cp_stag, refinement_ratio); + }); + } + } + else { // electrostatic + const amrex::IntVect& Bx_fp_stag = Bfield_fp[lev][0]->ixType().toIntVect(); + const amrex::IntVect& By_fp_stag = Bfield_fp[lev][1]->ixType().toIntVect(); + const amrex::IntVect& Bz_fp_stag = Bfield_fp[lev][2]->ixType().toIntVect(); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(*Bfield_aux[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + Array4 const& bx_aux = Bfield_aux[lev][0]->array(mfi); + Array4 const& by_aux = Bfield_aux[lev][1]->array(mfi); + Array4 const& bz_aux = Bfield_aux[lev][2]->array(mfi); + Array4 const& bx_fp = Bfield_fp[lev][0]->const_array(mfi); + Array4 const& by_fp = Bfield_fp[lev][1]->const_array(mfi); + Array4 const& bz_fp = Bfield_fp[lev][2]->const_array(mfi); + + const Box& bx = mfi.growntilebox(); + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp(j, k, l, bx_aux, bx_fp, Bx_fp_stag); + warpx_interp(j, k, l, by_aux, by_fp, By_fp_stag); + warpx_interp(j, k, l, bz_aux, bz_fp, Bz_fp_stag); + }); + } } } - // Efield { - Array,3> Etmp; - if (Efield_cax[lev][0]) { - for (int i = 0; i < 3; ++i) { - Etmp[i] = std::make_unique( - *Efield_cax[lev][i], amrex::make_alias, 0, 1); + if (electromagnetic_solver_id != ElectromagneticSolverAlgo::None) { + Array,3> Etmp; + if (Efield_cax[lev][0]) { + for (int i = 0; i < 3; ++i) { + Etmp[i] = std::make_unique( + *Efield_cax[lev][i], amrex::make_alias, 0, 1); + } + } else { + const IntVect ngtmp = Efield_aux[lev-1][0]->nGrowVect(); + for (int i = 0; i < 3; ++i) { + Etmp[i] = std::make_unique( + cnba, dm, 1, ngtmp); + } } - } else { - const IntVect ngtmp = Efield_aux[lev-1][0]->nGrowVect(); + Etmp[0]->setVal(0.0); + Etmp[1]->setVal(0.0); + Etmp[2]->setVal(0.0); + // ParallelCopy from coarse level for (int i = 0; i < 3; ++i) { - Etmp[i] = std::make_unique( - cnba, dm, 1, ngtmp); + const IntVect ng = Etmp[i]->nGrowVect(); + // Guard cells may not be up to date beyond ng_FieldGather + const amrex::IntVect& ng_src = guard_cells.ng_FieldGather; + // Copy Efield_aux to Etmp, using up to ng_src (=ng_FieldGather) guard cells from + // Efield_aux and filling up to ng (=nGrow) guard cells in Etmp + ablastr::utils::communication::ParallelCopy(*Etmp[i], *Efield_aux[lev - 1][i], 0, 0, 1, + ng_src, ng, WarpX::do_single_precision_comms, cperiod); } - } - Etmp[0]->setVal(0.0); - Etmp[1]->setVal(0.0); - Etmp[2]->setVal(0.0); - // ParallelCopy from coarse level - for (int i = 0; i < 3; ++i) { - const IntVect ng = Etmp[i]->nGrowVect(); - // Guard cells may not be up to date beyond ng_FieldGather - const amrex::IntVect& ng_src = guard_cells.ng_FieldGather; - // Copy Efield_aux to Etmp, using up to ng_src (=ng_FieldGather) guard cells from - // Efield_aux and filling up to ng (=nGrow) guard cells in Etmp - ablastr::utils::communication::ParallelCopy(*Etmp[i], *Efield_aux[lev - 1][i], 0, 0, 1, - ng_src, ng, WarpX::do_single_precision_comms, cperiod); - } - const amrex::IntVect& refinement_ratio = refRatio(lev-1); + const amrex::IntVect& refinement_ratio = refRatio(lev-1); - const amrex::IntVect& Ex_fp_stag = Efield_fp[lev][0]->ixType().toIntVect(); - const amrex::IntVect& Ey_fp_stag = Efield_fp[lev][1]->ixType().toIntVect(); - const amrex::IntVect& Ez_fp_stag = Efield_fp[lev][2]->ixType().toIntVect(); + const amrex::IntVect& Ex_fp_stag = Efield_fp[lev][0]->ixType().toIntVect(); + const amrex::IntVect& Ey_fp_stag = Efield_fp[lev][1]->ixType().toIntVect(); + const amrex::IntVect& Ez_fp_stag = Efield_fp[lev][2]->ixType().toIntVect(); - const amrex::IntVect& Ex_cp_stag = Efield_cp[lev][0]->ixType().toIntVect(); - const amrex::IntVect& Ey_cp_stag = Efield_cp[lev][1]->ixType().toIntVect(); - const amrex::IntVect& Ez_cp_stag = Efield_cp[lev][2]->ixType().toIntVect(); + const amrex::IntVect& Ex_cp_stag = Efield_cp[lev][0]->ixType().toIntVect(); + const amrex::IntVect& Ey_cp_stag = Efield_cp[lev][1]->ixType().toIntVect(); + const amrex::IntVect& Ez_cp_stag = Efield_cp[lev][2]->ixType().toIntVect(); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi) - { - Array4 const& ex_aux = Efield_aux[lev][0]->array(mfi); - Array4 const& ey_aux = Efield_aux[lev][1]->array(mfi); - Array4 const& ez_aux = Efield_aux[lev][2]->array(mfi); - Array4 const& ex_fp = Efield_fp[lev][0]->const_array(mfi); - Array4 const& ey_fp = Efield_fp[lev][1]->const_array(mfi); - Array4 const& ez_fp = Efield_fp[lev][2]->const_array(mfi); - Array4 const& ex_cp = Efield_cp[lev][0]->const_array(mfi); - Array4 const& ey_cp = Efield_cp[lev][1]->const_array(mfi); - Array4 const& ez_cp = Efield_cp[lev][2]->const_array(mfi); - Array4 const& ex_c = Etmp[0]->const_array(mfi); - Array4 const& ey_c = Etmp[1]->const_array(mfi); - Array4 const& ez_c = Etmp[2]->const_array(mfi); - - const Box& bx = mfi.fabbox(); - amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi) { - warpx_interp(j, k, l, ex_aux, ex_fp, ex_cp, ex_c, Ex_fp_stag, Ex_cp_stag, refinement_ratio); - warpx_interp(j, k, l, ey_aux, ey_fp, ey_cp, ey_c, Ey_fp_stag, Ey_cp_stag, refinement_ratio); - warpx_interp(j, k, l, ez_aux, ez_fp, ez_cp, ez_c, Ez_fp_stag, Ez_cp_stag, refinement_ratio); - }); + Array4 const& ex_aux = Efield_aux[lev][0]->array(mfi); + Array4 const& ey_aux = Efield_aux[lev][1]->array(mfi); + Array4 const& ez_aux = Efield_aux[lev][2]->array(mfi); + Array4 const& ex_fp = Efield_fp[lev][0]->const_array(mfi); + Array4 const& ey_fp = Efield_fp[lev][1]->const_array(mfi); + Array4 const& ez_fp = Efield_fp[lev][2]->const_array(mfi); + Array4 const& ex_cp = Efield_cp[lev][0]->const_array(mfi); + Array4 const& ey_cp = Efield_cp[lev][1]->const_array(mfi); + Array4 const& ez_cp = Efield_cp[lev][2]->const_array(mfi); + Array4 const& ex_c = Etmp[0]->const_array(mfi); + Array4 const& ey_c = Etmp[1]->const_array(mfi); + Array4 const& ez_c = Etmp[2]->const_array(mfi); + + const Box& bx = mfi.fabbox(); + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp(j, k, l, ex_aux, ex_fp, ex_cp, ex_c, Ex_fp_stag, Ex_cp_stag, refinement_ratio); + warpx_interp(j, k, l, ey_aux, ey_fp, ey_cp, ey_c, Ey_fp_stag, Ey_cp_stag, refinement_ratio); + warpx_interp(j, k, l, ez_aux, ez_fp, ez_cp, ez_c, Ez_fp_stag, Ez_cp_stag, refinement_ratio); + }); + } + } + else { // electrostatic + const amrex::IntVect& Ex_fp_stag = Efield_fp[lev][0]->ixType().toIntVect(); + const amrex::IntVect& Ey_fp_stag = Efield_fp[lev][1]->ixType().toIntVect(); + const amrex::IntVect& Ez_fp_stag = Efield_fp[lev][2]->ixType().toIntVect(); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(*Efield_aux[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + Array4 const& ex_aux = Efield_aux[lev][0]->array(mfi); + Array4 const& ey_aux = Efield_aux[lev][1]->array(mfi); + Array4 const& ez_aux = Efield_aux[lev][2]->array(mfi); + Array4 const& ex_fp = Efield_fp[lev][0]->const_array(mfi); + Array4 const& ey_fp = Efield_fp[lev][1]->const_array(mfi); + Array4 const& ez_fp = Efield_fp[lev][2]->const_array(mfi); + + const Box& bx = mfi.growntilebox(); + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp(j, k, l, ex_aux, ex_fp, Ex_fp_stag); + warpx_interp(j, k, l, ey_aux, ey_fp, Ey_fp_stag); + warpx_interp(j, k, l, ez_aux, ez_fp, Ez_fp_stag); + }); + } } } } @@ -341,141 +396,158 @@ WarpX::UpdateAuxilaryDataSameType () // B field { - MultiFab dBx(Bfield_cp[lev][0]->boxArray(), dm, Bfield_cp[lev][0]->nComp(), ng); - MultiFab dBy(Bfield_cp[lev][1]->boxArray(), dm, Bfield_cp[lev][1]->nComp(), ng); - MultiFab dBz(Bfield_cp[lev][2]->boxArray(), dm, Bfield_cp[lev][2]->nComp(), ng); - dBx.setVal(0.0); - dBy.setVal(0.0); - dBz.setVal(0.0); - - // Copy Bfield_aux to the dB MultiFabs, using up to ng_src (=ng_FieldGather) guard - // cells from Bfield_aux and filling up to ng (=nGrow) guard cells in the dB MultiFabs - - ablastr::utils::communication::ParallelCopy(dBx, *Bfield_aux[lev - 1][0], 0, 0, - Bfield_aux[lev - 1][0]->nComp(), ng_src, ng, WarpX::do_single_precision_comms, - crse_period); - ablastr::utils::communication::ParallelCopy(dBy, *Bfield_aux[lev - 1][1], 0, 0, - Bfield_aux[lev - 1][1]->nComp(), ng_src, ng, WarpX::do_single_precision_comms, - crse_period); - ablastr::utils::communication::ParallelCopy(dBz, *Bfield_aux[lev - 1][2], 0, 0, - Bfield_aux[lev - 1][2]->nComp(), ng_src, ng, WarpX::do_single_precision_comms, - crse_period); - - if (Bfield_cax[lev][0]) + if (electromagnetic_solver_id != ElectromagneticSolverAlgo::None) { - MultiFab::Copy(*Bfield_cax[lev][0], dBx, 0, 0, Bfield_cax[lev][0]->nComp(), ng); - MultiFab::Copy(*Bfield_cax[lev][1], dBy, 0, 0, Bfield_cax[lev][1]->nComp(), ng); - MultiFab::Copy(*Bfield_cax[lev][2], dBz, 0, 0, Bfield_cax[lev][2]->nComp(), ng); - } - MultiFab::Subtract(dBx, *Bfield_cp[lev][0], 0, 0, Bfield_cp[lev][0]->nComp(), ng); - MultiFab::Subtract(dBy, *Bfield_cp[lev][1], 0, 0, Bfield_cp[lev][1]->nComp(), ng); - MultiFab::Subtract(dBz, *Bfield_cp[lev][2], 0, 0, Bfield_cp[lev][2]->nComp(), ng); + MultiFab dBx(Bfield_cp[lev][0]->boxArray(), dm, Bfield_cp[lev][0]->nComp(), ng); + MultiFab dBy(Bfield_cp[lev][1]->boxArray(), dm, Bfield_cp[lev][1]->nComp(), ng); + MultiFab dBz(Bfield_cp[lev][2]->boxArray(), dm, Bfield_cp[lev][2]->nComp(), ng); + dBx.setVal(0.0); + dBy.setVal(0.0); + dBz.setVal(0.0); + + // Copy Bfield_aux to the dB MultiFabs, using up to ng_src (=ng_FieldGather) guard + // cells from Bfield_aux and filling up to ng (=nGrow) guard cells in the dB MultiFabs + + ablastr::utils::communication::ParallelCopy(dBx, *Bfield_aux[lev - 1][0], 0, 0, + Bfield_aux[lev - 1][0]->nComp(), ng_src, ng, WarpX::do_single_precision_comms, + crse_period); + ablastr::utils::communication::ParallelCopy(dBy, *Bfield_aux[lev - 1][1], 0, 0, + Bfield_aux[lev - 1][1]->nComp(), ng_src, ng, WarpX::do_single_precision_comms, + crse_period); + ablastr::utils::communication::ParallelCopy(dBz, *Bfield_aux[lev - 1][2], 0, 0, + Bfield_aux[lev - 1][2]->nComp(), ng_src, ng, WarpX::do_single_precision_comms, + crse_period); + + if (Bfield_cax[lev][0]) + { + MultiFab::Copy(*Bfield_cax[lev][0], dBx, 0, 0, Bfield_cax[lev][0]->nComp(), ng); + MultiFab::Copy(*Bfield_cax[lev][1], dBy, 0, 0, Bfield_cax[lev][1]->nComp(), ng); + MultiFab::Copy(*Bfield_cax[lev][2], dBz, 0, 0, Bfield_cax[lev][2]->nComp(), ng); + } + MultiFab::Subtract(dBx, *Bfield_cp[lev][0], 0, 0, Bfield_cp[lev][0]->nComp(), ng); + MultiFab::Subtract(dBy, *Bfield_cp[lev][1], 0, 0, Bfield_cp[lev][1]->nComp(), ng); + MultiFab::Subtract(dBz, *Bfield_cp[lev][2], 0, 0, Bfield_cp[lev][2]->nComp(), ng); - const amrex::IntVect& refinement_ratio = refRatio(lev-1); + const amrex::IntVect& refinement_ratio = refRatio(lev-1); - const amrex::IntVect& Bx_stag = Bfield_aux[lev-1][0]->ixType().toIntVect(); - const amrex::IntVect& By_stag = Bfield_aux[lev-1][1]->ixType().toIntVect(); - const amrex::IntVect& Bz_stag = Bfield_aux[lev-1][2]->ixType().toIntVect(); + const amrex::IntVect& Bx_stag = Bfield_aux[lev-1][0]->ixType().toIntVect(); + const amrex::IntVect& By_stag = Bfield_aux[lev-1][1]->ixType().toIntVect(); + const amrex::IntVect& Bz_stag = Bfield_aux[lev-1][2]->ixType().toIntVect(); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi) - { - Array4 const& bx_aux = Bfield_aux[lev][0]->array(mfi); - Array4 const& by_aux = Bfield_aux[lev][1]->array(mfi); - Array4 const& bz_aux = Bfield_aux[lev][2]->array(mfi); - Array4 const& bx_fp = Bfield_fp[lev][0]->const_array(mfi); - Array4 const& by_fp = Bfield_fp[lev][1]->const_array(mfi); - Array4 const& bz_fp = Bfield_fp[lev][2]->const_array(mfi); - Array4 const& bx_c = dBx.const_array(mfi); - Array4 const& by_c = dBy.const_array(mfi); - Array4 const& bz_c = dBz.const_array(mfi); - - amrex::ParallelFor(Box(bx_aux), Box(by_aux), Box(bz_aux), - [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept - { - warpx_interp(j, k, l, bx_aux, bx_fp, bx_c, Bx_stag, refinement_ratio); - }, - [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept - { - warpx_interp(j, k, l, by_aux, by_fp, by_c, By_stag, refinement_ratio); - }, - [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi) { - warpx_interp(j, k, l, bz_aux, bz_fp, bz_c, Bz_stag, refinement_ratio); - }); + Array4 const& bx_aux = Bfield_aux[lev][0]->array(mfi); + Array4 const& by_aux = Bfield_aux[lev][1]->array(mfi); + Array4 const& bz_aux = Bfield_aux[lev][2]->array(mfi); + Array4 const& bx_fp = Bfield_fp[lev][0]->const_array(mfi); + Array4 const& by_fp = Bfield_fp[lev][1]->const_array(mfi); + Array4 const& bz_fp = Bfield_fp[lev][2]->const_array(mfi); + Array4 const& bx_c = dBx.const_array(mfi); + Array4 const& by_c = dBy.const_array(mfi); + Array4 const& bz_c = dBz.const_array(mfi); + + amrex::ParallelFor(Box(bx_aux), Box(by_aux), Box(bz_aux), + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp(j, k, l, bx_aux, bx_fp, bx_c, Bx_stag, refinement_ratio); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp(j, k, l, by_aux, by_fp, by_c, By_stag, refinement_ratio); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp(j, k, l, bz_aux, bz_fp, bz_c, Bz_stag, refinement_ratio); + }); + } + } + else // electrostatic + { + MultiFab::Copy(*Bfield_aux[lev][0], *Bfield_fp[lev][0], 0, 0, Bfield_aux[lev][0]->nComp(), Bfield_aux[lev][0]->nGrowVect()); + MultiFab::Copy(*Bfield_aux[lev][1], *Bfield_fp[lev][1], 0, 0, Bfield_aux[lev][1]->nComp(), Bfield_aux[lev][1]->nGrowVect()); + MultiFab::Copy(*Bfield_aux[lev][2], *Bfield_fp[lev][2], 0, 0, Bfield_aux[lev][2]->nComp(), Bfield_aux[lev][2]->nGrowVect()); } } - // E field { - MultiFab dEx(Efield_cp[lev][0]->boxArray(), dm, Efield_cp[lev][0]->nComp(), ng); - MultiFab dEy(Efield_cp[lev][1]->boxArray(), dm, Efield_cp[lev][1]->nComp(), ng); - MultiFab dEz(Efield_cp[lev][2]->boxArray(), dm, Efield_cp[lev][2]->nComp(), ng); - dEx.setVal(0.0); - dEy.setVal(0.0); - dEz.setVal(0.0); - - // Copy Efield_aux to the dE MultiFabs, using up to ng_src (=ng_FieldGather) guard - // cells from Efield_aux and filling up to ng (=nGrow) guard cells in the dE MultiFabs - ablastr::utils::communication::ParallelCopy(dEx, *Efield_aux[lev - 1][0], 0, 0, - Efield_aux[lev - 1][0]->nComp(), ng_src, ng, - WarpX::do_single_precision_comms, - crse_period); - ablastr::utils::communication::ParallelCopy(dEy, *Efield_aux[lev - 1][1], 0, 0, - Efield_aux[lev - 1][1]->nComp(), ng_src, ng, - WarpX::do_single_precision_comms, - crse_period); - ablastr::utils::communication::ParallelCopy(dEz, *Efield_aux[lev - 1][2], 0, 0, - Efield_aux[lev - 1][2]->nComp(), ng_src, ng, - WarpX::do_single_precision_comms, - crse_period); - - if (Efield_cax[lev][0]) + if (electromagnetic_solver_id != ElectromagneticSolverAlgo::None) { - MultiFab::Copy(*Efield_cax[lev][0], dEx, 0, 0, Efield_cax[lev][0]->nComp(), ng); - MultiFab::Copy(*Efield_cax[lev][1], dEy, 0, 0, Efield_cax[lev][1]->nComp(), ng); - MultiFab::Copy(*Efield_cax[lev][2], dEz, 0, 0, Efield_cax[lev][2]->nComp(), ng); - } - MultiFab::Subtract(dEx, *Efield_cp[lev][0], 0, 0, Efield_cp[lev][0]->nComp(), ng); - MultiFab::Subtract(dEy, *Efield_cp[lev][1], 0, 0, Efield_cp[lev][1]->nComp(), ng); - MultiFab::Subtract(dEz, *Efield_cp[lev][2], 0, 0, Efield_cp[lev][2]->nComp(), ng); + MultiFab dEx(Efield_cp[lev][0]->boxArray(), dm, Efield_cp[lev][0]->nComp(), ng); + MultiFab dEy(Efield_cp[lev][1]->boxArray(), dm, Efield_cp[lev][1]->nComp(), ng); + MultiFab dEz(Efield_cp[lev][2]->boxArray(), dm, Efield_cp[lev][2]->nComp(), ng); + dEx.setVal(0.0); + dEy.setVal(0.0); + dEz.setVal(0.0); + + // Copy Efield_aux to the dE MultiFabs, using up to ng_src (=ng_FieldGather) guard + // cells from Efield_aux and filling up to ng (=nGrow) guard cells in the dE MultiFabs + ablastr::utils::communication::ParallelCopy(dEx, *Efield_aux[lev - 1][0], 0, 0, + Efield_aux[lev - 1][0]->nComp(), ng_src, ng, + WarpX::do_single_precision_comms, + crse_period); + ablastr::utils::communication::ParallelCopy(dEy, *Efield_aux[lev - 1][1], 0, 0, + Efield_aux[lev - 1][1]->nComp(), ng_src, ng, + WarpX::do_single_precision_comms, + crse_period); + ablastr::utils::communication::ParallelCopy(dEz, *Efield_aux[lev - 1][2], 0, 0, + Efield_aux[lev - 1][2]->nComp(), ng_src, ng, + WarpX::do_single_precision_comms, + crse_period); + + if (Efield_cax[lev][0]) + { + MultiFab::Copy(*Efield_cax[lev][0], dEx, 0, 0, Efield_cax[lev][0]->nComp(), ng); + MultiFab::Copy(*Efield_cax[lev][1], dEy, 0, 0, Efield_cax[lev][1]->nComp(), ng); + MultiFab::Copy(*Efield_cax[lev][2], dEz, 0, 0, Efield_cax[lev][2]->nComp(), ng); + } + MultiFab::Subtract(dEx, *Efield_cp[lev][0], 0, 0, Efield_cp[lev][0]->nComp(), ng); + MultiFab::Subtract(dEy, *Efield_cp[lev][1], 0, 0, Efield_cp[lev][1]->nComp(), ng); + MultiFab::Subtract(dEz, *Efield_cp[lev][2], 0, 0, Efield_cp[lev][2]->nComp(), ng); - const amrex::IntVect& refinement_ratio = refRatio(lev-1); + const amrex::IntVect& refinement_ratio = refRatio(lev-1); - const amrex::IntVect& Ex_stag = Efield_aux[lev-1][0]->ixType().toIntVect(); - const amrex::IntVect& Ey_stag = Efield_aux[lev-1][1]->ixType().toIntVect(); - const amrex::IntVect& Ez_stag = Efield_aux[lev-1][2]->ixType().toIntVect(); + const amrex::IntVect& Ex_stag = Efield_aux[lev-1][0]->ixType().toIntVect(); + const amrex::IntVect& Ey_stag = Efield_aux[lev-1][1]->ixType().toIntVect(); + const amrex::IntVect& Ez_stag = Efield_aux[lev-1][2]->ixType().toIntVect(); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi) - { - Array4 const& ex_aux = Efield_aux[lev][0]->array(mfi); - Array4 const& ey_aux = Efield_aux[lev][1]->array(mfi); - Array4 const& ez_aux = Efield_aux[lev][2]->array(mfi); - Array4 const& ex_fp = Efield_fp[lev][0]->const_array(mfi); - Array4 const& ey_fp = Efield_fp[lev][1]->const_array(mfi); - Array4 const& ez_fp = Efield_fp[lev][2]->const_array(mfi); - Array4 const& ex_c = dEx.const_array(mfi); - Array4 const& ey_c = dEy.const_array(mfi); - Array4 const& ez_c = dEz.const_array(mfi); - - amrex::ParallelFor(Box(ex_aux), Box(ey_aux), Box(ez_aux), - [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi) { - warpx_interp(j, k, l, ex_aux, ex_fp, ex_c, Ex_stag, refinement_ratio); - }, - [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept - { - warpx_interp(j, k, l, ey_aux, ey_fp, ey_c, Ey_stag, refinement_ratio); - }, - [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept - { - warpx_interp(j, k, l, ez_aux, ez_fp, ez_c, Ez_stag, refinement_ratio); - }); + Array4 const& ex_aux = Efield_aux[lev][0]->array(mfi); + Array4 const& ey_aux = Efield_aux[lev][1]->array(mfi); + Array4 const& ez_aux = Efield_aux[lev][2]->array(mfi); + Array4 const& ex_fp = Efield_fp[lev][0]->const_array(mfi); + Array4 const& ey_fp = Efield_fp[lev][1]->const_array(mfi); + Array4 const& ez_fp = Efield_fp[lev][2]->const_array(mfi); + Array4 const& ex_c = dEx.const_array(mfi); + Array4 const& ey_c = dEy.const_array(mfi); + Array4 const& ez_c = dEz.const_array(mfi); + + amrex::ParallelFor(Box(ex_aux), Box(ey_aux), Box(ez_aux), + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp(j, k, l, ex_aux, ex_fp, ex_c, Ex_stag, refinement_ratio); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp(j, k, l, ey_aux, ey_fp, ey_c, Ey_stag, refinement_ratio); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp(j, k, l, ez_aux, ez_fp, ez_c, Ez_stag, refinement_ratio); + }); + } + } + else // electrostatic + { + MultiFab::Copy(*Efield_aux[lev][0], *Efield_fp[lev][0], 0, 0, Efield_aux[lev][0]->nComp(), Efield_aux[lev][0]->nGrowVect()); + MultiFab::Copy(*Efield_aux[lev][1], *Efield_fp[lev][1], 0, 0, Efield_aux[lev][1]->nComp(), Efield_aux[lev][1]->nGrowVect()); + MultiFab::Copy(*Efield_aux[lev][2], *Efield_fp[lev][2], 0, 0, Efield_aux[lev][2]->nComp(), Efield_aux[lev][2]->nGrowVect()); } } } diff --git a/Source/Parallelization/WarpXComm_K.H b/Source/Parallelization/WarpXComm_K.H index a2b8fe38ed4..c3362087ad9 100644 --- a/Source/Parallelization/WarpXComm_K.H +++ b/Source/Parallelization/WarpXComm_K.H @@ -12,7 +12,7 @@ /** * \brief Interpolation function called within WarpX::UpdateAuxilaryDataSameType - * to interpolate data from the coarse and fine grids to the fine aux grid, + * with electromagnetic solver to interpolate data from the coarse and fine grids to the fine aux grid, * assuming that all grids have the same staggering (either collocated or staggered). * * \param[in] j index along x of the output array @@ -285,6 +285,94 @@ void warpx_interp (int j, int k, int l, // Final result arr_aux(j,k,l) = tmp + (fine - coarse); } +/** + * \brief Interpolation function called within WarpX::UpdateAuxilaryDataStagToNodal + * to interpolate data from the coarse and fine grids to the fine aux grid, + * with momentum-conserving field gathering, hence between grids with different staggering, + * and assuming that the aux grid is collocated. + * + * \param[in] j index along x of the output array + * \param[in] k index along y (in 3D) or z (in 2D) of the output array + * \param[in] l index along z (in 3D, l=0 in 2D) of the output array + * \param[in,out] arr_aux output array where interpolated values are stored + * \param[in] arr_fine input fine-patch array storing the values to interpolate + * \param[in] arr_fine_stag IndexType of the fine-patch arrays + */ +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp (int j, int k, int l, + amrex::Array4 const& arr_aux, + amrex::Array4 const& arr_fine, + const amrex::IntVect& arr_fine_stag) +{ + using namespace amrex; + + // Pad input arrays with zeros beyond ghost cells + // for out-of-bound accesses due to large-stencil operations + const auto arr_fine_zeropad = [arr_fine] (const int jj, const int kk, const int ll) noexcept + { + return arr_fine.contains(jj,kk,ll) ? arr_fine(jj,kk,ll) : 0.0_rt; + }; + + // NOTE Indices (j,k,l) in the following refer to: + // - (z,-,-) in 1D + // - (x,z,-) in 2D + // - (r,z,-) in RZ + // - (x,y,z) in 3D + + // Staggering of fine array (0: cell-centered; 1: nodal) + const int sj_fp = arr_fine_stag[0]; +#if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) + const int sk_fp = arr_fine_stag[1]; +#elif defined(WARPX_DIM_3D) + const int sk_fp = arr_fine_stag[1]; + const int sl_fp = arr_fine_stag[2]; +#endif + + // Number of points used for interpolation from coarse grid to fine grid + int nj; + int nk; + int nl; + + amrex::Real fine = 0.0_rt; + + // 3) Interpolation from fine staggered to fine nodal + + nj = (sj_fp == 0) ? 2 : 1; +#if defined(WARPX_DIM_1D_Z) + nk = 1; + nl = 1; +#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) + nk = (sk_fp == 0) ? 2 : 1; + nl = 1; +#else + nk = (sk_fp == 0) ? 2 : 1; + nl = (sl_fp == 0) ? 2 : 1; +#endif + + const int jm = (sj_fp == 0) ? j-1 : j; +#if defined(WARPX_DIM_1D_Z) + const int km = k; + const int lm = l; +#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) + const int km = (sk_fp == 0) ? k-1 : k; + const int lm = l; +#else + const int km = (sk_fp == 0) ? k-1 : k; + const int lm = (sl_fp == 0) ? l-1 : l; +#endif + + for (int jj = 0; jj < nj; jj++) { + for (int kk = 0; kk < nk; kk++) { + for (int ll = 0; ll < nl; ll++) { + fine += arr_fine_zeropad(jm+jj,km+kk,lm+ll); + } + } + } + fine = fine/static_cast(nj*nk*nl); + + // Final result + arr_aux(j,k,l) = fine; +} /** * \brief Arbitrary-order interpolation function used to center a given MultiFab between two grids diff --git a/Source/ablastr/fields/PoissonSolver.H b/Source/ablastr/fields/PoissonSolver.H index ca262981010..589c6ec1835 100644 --- a/Source/ablastr/fields/PoissonSolver.H +++ b/Source/ablastr/fields/PoissonSolver.H @@ -263,10 +263,15 @@ computePhi (amrex::Vector const & rho, mlmg.setVerbose(verbosity); mlmg.setMaxIter(max_iters); mlmg.setAlwaysUseBNorm(always_use_bnorm); + if (WarpX::grid_type == GridType::Collocated) { + // In this case, computeE needs to use ghost nodes data. So we + // ask MLMG to fill BC for us after it solves the problem. + mlmg.setFinalFillBC(true); + } // Solve Poisson equation at lev mlmg.solve( {phi[lev]}, {rho[lev]}, - relative_tolerance, absolute_tolerance ); + relative_tolerance, absolute_tolerance ); // needed for solving the levels by levels: // - coarser level is initial guess for finer level @@ -280,10 +285,14 @@ computePhi (amrex::Vector const & rho, const amrex::IntVect& refratio = rel_ref_ratio.value()[lev]; ba.coarsen(refratio); const int ncomp = linop.getNComp(); - amrex::MultiFab phi_cp(ba, phi[lev+1]->DistributionMap(), ncomp, 1); + const int ng = (WarpX::grid_type == GridType::Collocated) ? 1 : 0; + amrex::MultiFab phi_cp(ba, phi[lev+1]->DistributionMap(), ncomp, ng); + if (ng > 0) { + // Set all values outside the domain to zero + phi_cp.setDomainBndry(0.0_rt, geom[lev]); + } // Copy from phi[lev] to phi_cp (in parallel) - const amrex::IntVect& ng = amrex::IntVect::TheUnitVector(); const amrex::Periodicity& crse_period = geom[lev].periodicity(); ablastr::utils::communication::ParallelCopy( @@ -292,8 +301,8 @@ computePhi (amrex::Vector const & rho, 0, 0, 1, - ng, - ng, + amrex::IntVect(0), + amrex::IntVect(ng), do_single_precision_comms, crse_period ); @@ -308,7 +317,7 @@ computePhi (amrex::Vector const & rho, details::PoissonInterpCPtoFP const interp(phi_fp_arr, phi_cp_arr, refratio); - amrex::Box const b = mfi.tilebox(phi[lev + 1]->ixType().toIntVect()); + amrex::Box const& b = mfi.growntilebox(ng); amrex::ParallelFor(b, interp); }