From c50f206656696ce9602db2863ea7311e2399be06 Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Thu, 9 Nov 2023 13:46:20 -0500 Subject: [PATCH 1/2] fixup LinkedCell NeighborList interface --- core/src/Cabana_LinkedCellList.hpp | 41 ++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/core/src/Cabana_LinkedCellList.hpp b/core/src/Cabana_LinkedCellList.hpp index 8603e3fd0..b448ea113 100644 --- a/core/src/Cabana_LinkedCellList.hpp +++ b/core/src/Cabana_LinkedCellList.hpp @@ -504,6 +504,11 @@ class LinkedCellList return _particle_bins; } + auto getParticleBin( const int particle_index ) const + { + return _particle_bins( particle_index ); + } + /*! \brief Determines which particles belong to bin i \param i the cardinal bin index of the current bin @@ -665,24 +670,35 @@ class NeighborList> //! Get the maximum number of neighbors per particle. template KOKKOS_INLINE_FUNCTION static std::size_t - maxNeighbor( const list_type& list, const CellIndexType cell ) + totalNeighbor( const list_type& list ) { int total_count = 0; for ( int p = 0; p < list.numParticles(); ++p ) - total_count += numNeighbor( list, cell, p ); + total_count += numNeighbor( list, p ); return total_count; } + //! Get the maximum number of neighbors across all particles. + KOKKOS_INLINE_FUNCTION + static std::size_t maxNeighbor( const list_type& list ) + { + std::size_t max_n = 0; + // Loop across all particles to find maximum number of neighbors. + for ( std::size_t p = 0; p < list.numParticles(); p++ ) + if ( numNeighbor( list, p ) > max_n ) + max_n = numNeighbor( list, p ); + return max_n; + } + //! Get the number of neighbors for a given particle index. template KOKKOS_INLINE_FUNCTION static std::size_t - numNeighbor( const list_type& list, const CellIndexType cell, - const std::size_t particle_index ) + numNeighbor( const list_type& list, const std::size_t particle_index ) { int total_count = 0; int imin, imax, jmin, jmax, kmin, kmax; - list.getStencilCells( cell( particle_index ), imin, imax, jmin, jmax, - kmin, kmax ); + list.getStencilCells( list.getParticleBin( particle_index ), imin, imax, + jmin, jmax, kmin, kmax ); // Loop over the cell stencil. for ( int i = imin; i < imax; ++i ) @@ -698,15 +714,14 @@ class NeighborList> //! the neighbor relative to the particle. template KOKKOS_INLINE_FUNCTION static std::size_t - getNeighbor( const list_type& list, const CellIndexType cell, - const std::size_t particle_index, - const std::size_t neighbor_index, const bool sorted = true ) + getNeighbor( const list_type& list, const std::size_t particle_index, + const std::size_t neighbor_index ) { - int total_count = 0; + std::size_t total_count = 0; int previous_count = 0; int imin, imax, jmin, jmax, kmin, kmax; - list.getStencilCells( cell( particle_index ), imin, imax, jmin, jmax, - kmin, kmax ); + list.getStencilCells( list.getParticleBin( particle_index ), imin, imax, + jmin, jmax, kmin, kmax ); // Loop over the cell stencil. for ( int i = imin; i < imax; ++i ) @@ -718,7 +733,7 @@ class NeighborList> { int particle_id = list.binOffset( i, j, k ) + ( neighbor_index - previous_count ); - if ( sorted ) + if ( list.sorted() ) return particle_id; else return list.permutation( particle_id ); From 2861252b04b6ca638d1e34dc6814e0ec16dd5f36 Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Thu, 9 Nov 2023 13:58:51 -0500 Subject: [PATCH 2/2] Add LinkedCell reduce (needs to be tested) --- core/src/Cabana_Parallel.hpp | 265 +++++++++++++++++++++++++++++++++++ 1 file changed, 265 insertions(+) diff --git a/core/src/Cabana_Parallel.hpp b/core/src/Cabana_Parallel.hpp index e8bece249..12a6e83c8 100644 --- a/core/src/Cabana_Parallel.hpp +++ b/core/src/Cabana_Parallel.hpp @@ -1246,6 +1246,128 @@ struct LinkedCellParallelFor }; }; +/*! + \brief Struct for performing a loop over linked cell bins + and then the particles in those bins on device +*/ +template +struct LinkedCellParallelReduce +{ + //! index type to be used for _begin + using index_type = typename Policy::index_type; + + //! Execution policy + Policy _exec_policy; + //! Functor to execute + Functor _functor; + //! Linked cell list + LinkedCellType _list; + //! result of getStencilCells + ViewType _cells; + + //! beginning index of the loop + index_type _begin; + + //! discriminator for whether a particle is a neighbor or not + NeighborDiscriminator _discriminator; + + //! Constructor + LinkedCellParallelReduce( std::string label, Policy exec_policy, + Functor functor, const LinkedCellType& list, + ViewType cells, ReduceType& reduce_val, + const index_type begin = 0 ) + : _exec_policy( exec_policy ) + , _functor( functor ) + , _list( list ) + , _cells( cells ) + , _begin( begin ) + { + if ( label.empty() ) + Kokkos::parallel_reduce( dynamic_cast( exec_policy ), + *this, reduce_val ); + else + Kokkos::parallel_reduce( label, + dynamic_cast( exec_policy ), + *this, reduce_val ); + } + + //! Performs a loop over the particles in neighboring bins in Serial + KOKKOS_FUNCTION void operator()( SerialOpTag, const index_type i, + ReduceType& ival ) const + { + int imin, imax, jmin, jmax, kmin, kmax; + _list.getStencilCells( _cells( i ), imin, imax, jmin, jmax, kmin, + kmax ); + + // Loop over the cell stencil. + for ( int gi = imin; gi < imax; ++gi ) + for ( int gj = jmin; gj < jmax; ++gj ) + for ( int gk = kmin; gk < kmax; ++gk ) + { + // Check the particles in this bin to see if they are + // neighbors. + auto offset = _list.binOffset( gi, gj, gk ); + auto size = _list.binSize( gi, gj, gk ); + for ( std::size_t j = offset; j < offset + size; ++j ) + { + // Get the true id of the candidate neighbor. + std::size_t jj; + if ( !_list.sorted() ) + jj = _list.permutation( j ); + else + jj = j; + // Avoid self interactions (dummy position args). + if ( _discriminator.isValid( i, 0, 0, 0, jj, 0, 0, 0 ) ) + { + Impl::functorTagDispatch( _functor, i, jj, + ival ); + } + } + } + } + + //! Performs a loop over the particles in neighboring bins in TeamOp + KOKKOS_FUNCTION void operator()( TeamOpTag, + const typename Policy::member_type& team, + ReduceType& ival ) const + { + index_type i = team.league_rank() + _begin; + int imin, imax, jmin, jmax, kmin, kmax; + _list.getStencilCells( _cells( i ), imin, imax, jmin, jmax, kmin, + kmax ); + + // Loop over the cell stencil. + for ( int gi = imin; gi < imax; ++gi ) + for ( int gj = jmin; gj < jmax; ++gj ) + for ( int gk = kmin; gk < kmax; ++gk ) + { + // Check the particles in this bin to see if they + // are neighbors. + auto offset = _list.binOffset( gi, gj, gk ); + auto size = _list.binSize( gi, gj, gk ); + Kokkos::parallel_for( + Kokkos::TeamThreadRange( team, offset, offset + size ), + [&]( const index_type j ) + { + // Get the true id of the candidate neighbor. + std::size_t jj; + if ( !_list.sorted() ) + jj = _list.permutation( j ); + else + jj = j; + // Avoid self interactions (dummy position args). + if ( _discriminator.isValid( i, 0, 0, 0, jj, 0, 0, + 0 ) ) + { + Impl::functorTagDispatch( _functor, i, + jj, ival ); + } + } ); + } + }; +}; + //---------------------------------------------------------------------------// /*! \brief Execute functor in parallel according to the execution policy over @@ -1382,6 +1504,149 @@ inline void neighbor_parallel_for( exec_policy.begin() ); } +//---------------------------------------------------------------------------// +/*! + \brief Execute functor in parallel according to the execution policy over + particles with thread-local serial loops over linked cell list bins and + particle first neighbors in those bins. + + \tparam FunctorType The functor type to execute. + \tparam NeighborListType The neighbor list type. + \tparam ExecParams The Kokkos range policy parameters. + + \param exec_policy The policy over which to execute the functor. + \param functor The functor to execute in parallel + \param list The linked cell list over which to execute neighbor operations. + \param FirstNeighborsTag Tag indicating operations over particle first + neighbors. + \param SerialOpTag Tag indicating a serial loop strategy over neighbors. + \param str Optional name for the functor. Will be forwarded if non-empty to + the Kokkos::parallel_for called by this code and can be used for + identification and profiling purposes. + + A "functor" is a class containing the function to execute in parallel, data + needed for that execution, and an optional \c execution_space typedef. Here + is an example functor for neighbor parallel_for: + + \code + class FunctorType { + public: + typedef ... execution_space ; + void operator() ( const int particle_index, const int neighbor_index ) const ; + }; + \endcode + + In the above example, \c Index is a Cabana index to a given AoSoA element + for a particle and its neighbor. Its operator() method defines the + operation to parallelize, over the range of indices + idx=[begin,end]. This compares to a single iteration \c idx of a + \c for loop. +*/ +template +inline void neighbor_parallel_reduce( + const Kokkos::RangePolicy& exec_policy, + const FunctorType& functor, const LinkedCellType& list, + const FirstNeighborsTag, const SerialOpTag, ReduceType& reduce_val, + const std::string& str = "", + typename std::enable_if<( is_linked_cell_list::value ), + int>::type* = 0 ) +{ + using work_tag = typename Kokkos::RangePolicy::work_tag; + using execution_space = + typename Kokkos::RangePolicy::execution_space; + + using memory_space = typename LinkedCellType::memory_space; + + auto begin = exec_policy.begin(); + auto end = exec_policy.end(); + using linear_policy_type = + Kokkos::RangePolicy; + linear_policy_type linear_exec_policy( begin, end ); + + static_assert( is_accessible_from{}, "" ); + + auto particle_bins = list.getParticleBins(); + + LinkedCellParallelReduce + lcl_par( str, linear_exec_policy, functor, list, particle_bins, + reduce_val ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Execute functor in parallel according to the execution policy over + particles with thread-local serial loops over linked cell list bins and + team threading over particle first neighbors in those bins. + + \tparam FunctorType The functor type to execute. + \tparam NeighborListType The neighbor list type. + \tparam ExecParams The Kokkos range policy parameters. + + \param exec_policy The policy over which to execute the functor. + \param functor The functor to execute in parallel + \param list The linked cell list over which to execute neighbor operations. + \param FirstNeighborsTag Tag indicating operations over particle first + neighbors. + \param TeamOpTag Tag indicating a team parallel strategy over particle + neighbors. + \param str Optional name for the functor. Will be forwarded if non-empty to + the Kokkos::parallel_for called by this code and can be used for + identification and profiling purposes. + + A "functor" is a class containing the function to execute in parallel, data + needed for that execution, and an optional \c execution_space typedef. Here + is an example functor for neighbor parallel_for: + + \code + class FunctorType { + public: + typedef ... execution_space ; + void operator() ( const int particle_index, const int neighbor_index ) const ; + }; + \endcode + + In the above example, \c Index is a Cabana index to a given AoSoA element + for a particle and its neighbor. Its operator() method defines the + operation to parallelize, over the range of indices + idx=[begin,end]. This compares to a single iteration \c idx of a + \c for loop. +*/ +template +inline void neighbor_parallel_reduce( + const Kokkos::RangePolicy& exec_policy, + const FunctorType& functor, const LinkedCellType& list, + const FirstNeighborsTag, const TeamOpTag, ReduceType& reduce_val, + const std::string& str = "", + typename std::enable_if<( is_linked_cell_list::value ), + int>::type* = 0 ) +{ + using work_tag = typename Kokkos::RangePolicy::work_tag; + using execution_space = + typename Kokkos::RangePolicy::execution_space; + + using team_policy_type = + Kokkos::TeamPolicy>; + team_policy_type team_policy( exec_policy.end() - exec_policy.begin(), + Kokkos::AUTO ); + + using memory_space = typename LinkedCellType::memory_space; + + static_assert( is_accessible_from{}, "" ); + + auto particle_bins = list.getParticleBins(); + + LinkedCellParallelReduce + lcl_par( str, team_policy, functor, list, particle_bins, reduce_val, + exec_policy.begin() ); +} + } // end namespace Cabana #endif // end CABANA_PARALLEL_HPP