diff --git a/core/src/Cabana_LinkedCellList.hpp b/core/src/Cabana_LinkedCellList.hpp index dbdb52df4..eadc58061 100644 --- a/core/src/Cabana_LinkedCellList.hpp +++ b/core/src/Cabana_LinkedCellList.hpp @@ -504,7 +504,7 @@ class LinkedCellList return _particle_bins; } - auto getParticleBin() const + auto getParticleBin( const int particle_index ) const { return _particle_bins( particle_index ); } @@ -658,36 +658,45 @@ void permute( //---------------------------------------------------------------------------// //! LinkedCellList NeighborList interface. -template -class NeighborList> +template +class NeighborList> { public: //! Kokkos memory space. - using device_type = DeviceType; + using memory_space = MemorySpace; //! Neighbor list type. - using list_type = LinkedCellList; + using list_type = LinkedCellList; //! Get the maximum number of neighbors per particle. - template KOKKOS_INLINE_FUNCTION static std::size_t - maxNeighbor( const list_type& list, const CellIndexType cell ) + totalNeighbor( const list_type& list ) { int total_count = 0; for ( int p = 0; p < list.numParticles(); ++p ) - total_count += numNeighbor( list, cell, p ); + total_count += numNeighbor( list, p ); return total_count; } + //! Get the maximum number of neighbors across all particles. + KOKKOS_INLINE_FUNCTION + static std::size_t maxNeighbor( const list_type& list ) + { + std::size_t max_n = 0; + // Loop across all particles to find maximum number of neighbors. + for ( std::size_t p = 0; p < list.numParticles(); p++ ) + if ( numNeighbor( list, p ) > max_n ) + max_n = numNeighbor( list, p ); + return max_n; + } + //! Get the number of neighbors for a given particle index. - template KOKKOS_INLINE_FUNCTION static std::size_t - numNeighbor( const list_type& list, - const std::size_t particle_index ) + numNeighbor( const list_type& list, const std::size_t particle_index ) { int total_count = 0; int imin, imax, jmin, jmax, kmin, kmax; - list.getStencilCells( list.getParticleBin( particle_index ), imin, imax, jmin, jmax, - kmin, kmax ); + list.getStencilCells( list.getParticleBin( particle_index ), imin, imax, + jmin, jmax, kmin, kmax ); // Loop over the cell stencil. for ( int i = imin; i < imax; ++i ) @@ -701,17 +710,15 @@ class NeighborList> //! Get the id for a neighbor for a given particle index and the index of //! the neighbor relative to the particle. - template KOKKOS_INLINE_FUNCTION static std::size_t - getNeighbor( const list_type& list, - const std::size_t particle_index, + getNeighbor( const list_type& list, const std::size_t particle_index, const std::size_t neighbor_index ) { - size_t total_count = 0; - size_t previous_count = 0; + std::size_t total_count = 0; + std::size_t previous_count = 0; int imin, imax, jmin, jmax, kmin, kmax; - list.getStencilCells( list.getParticleBin( particle_index ), imin, imax, jmin, jmax, - kmin, kmax ); + list.getStencilCells( list.getParticleBin( particle_index ), imin, imax, + jmin, jmax, kmin, kmax ); // Loop over the cell stencil. for ( int i = imin; i < imax; ++i ) diff --git a/core/src/Cabana_Parallel.hpp b/core/src/Cabana_Parallel.hpp index ccac41476..a204f8968 100644 --- a/core/src/Cabana_Parallel.hpp +++ b/core/src/Cabana_Parallel.hpp @@ -604,6 +604,164 @@ inline void neighbor_parallel_for( Kokkos::Profiling::popRegion(); } +//---------------------------------------------------------------------------// +/*! + \brief Execute functor in parallel according to the execution policy over + particles with team parallelism over particle first neighbors. + + \tparam FunctorType The functor type to execute. + \tparam NeighborListType The neighbor list type. + \tparam ExecParams The Kokkos range policy parameters. + + \param exec_policy The policy over which to execute the functor. + \param functor The functor to execute in parallel + \param list The neighbor list over which to execute the neighbor operations. + This functor is specifically for linked cell lists. + \param FirstNeighborsTag Tag indicating operations over particle first + neighbors. + \param SerialOpTag Tag indicating a serial strategy over neighbors. + \param str Optional name for the functor. Will be forwarded if non-empty to + the Kokkos::parallel_for called by this code and can be used for + identification and profiling purposes. +*/ +template +inline void neighbor_parallel_for( + const Kokkos::RangePolicy& exec_policy, + const FunctorType& functor, const NeighborListType& list, + const FirstNeighborsTag, const SerialOpTag, const std::string& str = "", + typename std::enable_if<( is_linked_cell_list::value ), + int>::type* = 0 ) +{ + Kokkos::Profiling::pushRegion( "Cabana::neighbor_parallel_for" ); + + auto particle_bins = list.getParticleBins(); + + using work_tag = typename Kokkos::RangePolicy::work_tag; + + using execution_space = + typename Kokkos::RangePolicy::execution_space; + + using index_type = + typename Kokkos::RangePolicy::index_type; + + using neighbor_list_traits = NeighborList; + + using memory_space = typename NeighborListType::memory_space; + + auto begin = exec_policy.begin(); + auto end = exec_policy.end(); + using linear_policy_type = Kokkos::RangePolicy; + linear_policy_type linear_exec_policy( begin, end ); + + static_assert( is_accessible_from{}, "" ); + + //! discriminator for whether a particle is a neighbor or not + NeighborDiscriminator _discriminator; + + auto neigh_func = KOKKOS_LAMBDA( const index_type i ) + { + for ( index_type n = 0; + n < neighbor_list_traits::numNeighbor( list, i ); ++n ) + { + int j = neighbor_list_traits::getNeighbor( list, i, n ); + if ( _discriminator.isValid( i, 0, 0, 0, j, 0, 0, 0 ) ) + { + Impl::functorTagDispatch( + functor, i, + static_cast( j ) ); + } + } + }; + if ( str.empty() ) + Kokkos::parallel_for( linear_exec_policy, neigh_func ); + else + Kokkos::parallel_for( str, linear_exec_policy, neigh_func ); + + Kokkos::Profiling::popRegion(); +} + +//---------------------------------------------------------------------------// +/*! + \brief Execute functor in parallel according to the execution policy over + particles with team parallelism over particle first neighbors. + + \tparam FunctorType The functor type to execute. + \tparam NeighborListType The neighbor list type. + \tparam ExecParams The Kokkos range policy parameters. + + \param exec_policy The policy over which to execute the functor. + \param list The neighbor list over which to execute the neighbor operations. + This functor is specifically for linked cell lists. + \param functor The functor to execute in parallel + \param FirstNeighborsTag Tag indicating operations over particle first + neighbors. + \param TeamOpTag Tag indicating a team parallel strategy over neighbors. + \param str Optional name for the functor. Will be forwarded if non-empty to + the Kokkos::parallel_for called by this code and can be used for + identification and profiling purposes. +*/ +template +inline void neighbor_parallel_for( + const Kokkos::RangePolicy& exec_policy, + const FunctorType& functor, const NeighborListType& list, + const FirstNeighborsTag, const TeamOpTag, const std::string& str = "", + typename std::enable_if<( is_linked_cell_list::value ), + int>::type* = 0 ) +{ + Kokkos::Profiling::pushRegion( "Cabana::neighbor_parallel_for" ); + + auto particle_bins = list.getParticleBins(); + + using work_tag = typename Kokkos::RangePolicy::work_tag; + + using execution_space = + typename Kokkos::RangePolicy::execution_space; + + using kokkos_policy = + Kokkos::TeamPolicy>; + kokkos_policy team_policy( exec_policy.end() - exec_policy.begin(), + Kokkos::AUTO ); + + using index_type = typename kokkos_policy::index_type; + + using neighbor_list_traits = NeighborList; + + using memory_space = typename NeighborListType::memory_space; + + static_assert( is_accessible_from{}, "" ); + + const auto range_begin = exec_policy.begin(); + + //! discriminator for whether a particle is a neighbor or not + NeighborDiscriminator _discriminator; + + auto neigh_func = + KOKKOS_LAMBDA( const typename kokkos_policy::member_type& team ) + { + index_type i = team.league_rank() + range_begin; + Kokkos::parallel_for( + Kokkos::TeamThreadRange( + team, neighbor_list_traits::numNeighbor( list, i ) ), + [&]( const index_type n ) + { + int j = neighbor_list_traits::getNeighbor( list, i, n ); + if ( _discriminator.isValid( i, 0, 0, 0, j, 0, 0, 0 ) ) + { + Impl::functorTagDispatch( + functor, i, + static_cast( j ) ); + } + } ); + }; + if ( str.empty() ) + Kokkos::parallel_for( team_policy, neigh_func ); + else + Kokkos::parallel_for( str, team_policy, neigh_func ); + + Kokkos::Profiling::popRegion(); +} + + //---------------------------------------------------------------------------// // Neighbor Parallel Reduce //---------------------------------------------------------------------------// @@ -1040,66 +1198,234 @@ inline void neighbor_parallel_reduce( //---------------------------------------------------------------------------// /*! - \brief Execute functor in serial within existing parallel kernel over particle - first neighbors. + \brief Execute functor reductionin parallel according to the execution policy + over particles with team parallelism over particle first neighbors. - \tparam IndexType The particle index type. - \tparam FunctorType The neighbor functor type to execute. + \tparam FunctorType The functor type to execute. \tparam NeighborListType The neighbor list type. + \tparam ExecParams The Kokkos range policy parameters. + \tparam ReduceType The reduction type - \param i Particle index. - \param neighbor_functor The neighbor functor to execute in parallel. + \param exec_policy The policy over which to execute the functor. + \param functor The functor to execute in parallel \param list The neighbor list over which to execute the neighbor operations. + This functor is specifically for linked cell lists. \param FirstNeighborsTag Tag indicating operations over particle first neighbors. + \param SerialOpTag Tag indicating a serial strategy over neighbors. + \param str Optional name for the functor. Will be forwarded if non-empty to + the Kokkos::parallel_reduce called by this code and can be used for + identification and profiling purposes. +*/ +template +inline void neighbor_parallel_reduce( + const Kokkos::RangePolicy& exec_policy, + const FunctorType& functor, const NeighborListType& list, + const FirstNeighborsTag, const SerialOpTag, const ReduceType& reduce_val, + const std::string& str = "", + typename std::enable_if<( is_linked_cell_list::value ), + int>::type* = 0 ) +{ + Kokkos::Profiling::pushRegion( "Cabana::neighbor_parallel_reduce" ); - A "functor" is a class containing the function to execute in parallel, data - needed for that execution, and an optional \c execution_space typedef. Here - is an example functor for neighbor parallel_for: + auto particle_bins = list.getParticleBins(); - \code - class FunctorType { - public: - typedef ... execution_space ; - void operator() ( const int particle_index, const int neighbor_index ) const ; - }; - \endcode + using work_tag = typename Kokkos::RangePolicy::work_tag; + + using execution_space = + typename Kokkos::RangePolicy::execution_space; + + using index_type = + typename Kokkos::RangePolicy::index_type; - In the above example, \c Index is a Cabana index to a given AoSoA element - for a particle and its neighbor. Its operator() method defines the - operation to parallelize, over the range of indices - idx=[begin,end]. This compares to a single iteration \c idx of a - \c for loop. -*/ -template -KOKKOS_INLINE_FUNCTION void -for_each_neighbor( const IndexType i, const FunctorType& neighbor_functor, - const NeighborListType& list, const FirstNeighborsTag ) -{ using neighbor_list_traits = NeighborList; - for ( IndexType n = 0; - n < static_cast( - neighbor_list_traits::numNeighbor( list, i ) ); - ++n ) - neighbor_functor( - i, static_cast( - neighbor_list_traits::getNeighbor( list, i, n ) ) ); + using memory_space = typename NeighborListType::memory_space; + + auto begin = exec_policy.begin(); + auto end = exec_policy.end(); + using linear_policy_type = Kokkos::RangePolicy; + linear_policy_type linear_exec_policy( begin, end ); + + static_assert( is_accessible_from{}, "" ); + + //! discriminator for whether a particle is a neighbor or not + NeighborDiscriminator _discriminator; + + auto neigh_reduce = KOKKOS_LAMBDA( const index_type i, ReduceType& ival ) + { + for ( index_type n = 0; + n < neighbor_list_traits::numNeighbor( list, i ); ++n ) + { + int j = neighbor_list_traits::getNeighbor( list, i, n ); + if ( _discriminator.isValid( i, 0, 0, 0, j, 0, 0, 0 ) ) + { + Impl::functorTagDispatch( + functor, i, + static_cast( j ), ival ); + } + } + }; + if ( str.empty() ) + Kokkos::parallel_reduce( linear_exec_policy, neigh_reduce, reduce_val ); + else + Kokkos::parallel_reduce( str, linear_exec_policy, neigh_reduce, + reduce_val ); + + Kokkos::Profiling::popRegion(); } //---------------------------------------------------------------------------// /*! - \brief Execute team parallel functor within existing parallel kernel over - particle first neighbors. + \brief Execute functor reduction in parallel according to the execution policy + over particles with team parallelism over particle first neighbors. - \tparam IndexType The particle index type. - \tparam FunctorType The neighbor functor type to execute. + \tparam FunctorType The functor type to execute. \tparam NeighborListType The neighbor list type. - \tparam TeamMemberType Kokkos team policy. + \tparam ExecParams The Kokkos range policy parameters. + \tparam ReduceType The reduction type. - \param i Particle index. - \param team Kokkos team. - \param neighbor_functor The neighbor functor to execute in parallel. + \param exec_policy The policy over which to execute the functor. + \param list The neighbor list over which to execute the neighbor operations. + This functor is specifically for linked cell lists. + \param functor The functor to execute in parallel + \param FirstNeighborsTag Tag indicating operations over particle first + neighbors. + \param TeamOpTag Tag indicating a team parallel strategy over neighbors. + \param str Optional name for the functor. Will be forwarded if non-empty to + the Kokkos::parallel_for called by this code and can be used for + identification and profiling purposes. +*/ +template +inline void neighbor_parallel_reduce( + const Kokkos::RangePolicy& exec_policy, + const FunctorType& functor, const NeighborListType& list, + const FirstNeighborsTag, const TeamOpTag, ReduceType& reduce_val, + const std::string& str = "", + typename std::enable_if<( is_linked_cell_list::value ), + int>::type* = 0 ) +{ + Kokkos::Profiling::pushRegion( "Cabana::neighbor_parallel_reduce" ); + + auto particle_bins = list.getParticleBins(); + + using work_tag = typename Kokkos::RangePolicy::work_tag; + + using execution_space = + typename Kokkos::RangePolicy::execution_space; + + using kokkos_policy = + Kokkos::TeamPolicy>; + kokkos_policy team_policy( exec_policy.end() - exec_policy.begin(), + Kokkos::AUTO ); + + using index_type = typename kokkos_policy::index_type; + + using neighbor_list_traits = NeighborList; + + using memory_space = typename NeighborListType::memory_space; + + static_assert( is_accessible_from{}, "" ); + + const auto range_begin = exec_policy.begin(); + + //! discriminator for whether a particle is a neighbor or not + NeighborDiscriminator _discriminator; + + auto neigh_reduce = KOKKOS_LAMBDA( + const typename kokkos_policy::member_type& team, ReduceType& ival ) + { + index_type i = team.league_rank() + range_begin; + ReduceType reduce_n = 0; + Kokkos::parallel_for( + Kokkos::TeamThreadRange( + team, neighbor_list_traits::numNeighbor( list, i ) ), + [&]( const index_type n, ReduceType& nval ) + { + int j = neighbor_list_traits::getNeighbor( list, i, n ); + if ( _discriminator.isValid( i, 0, 0, 0, j, 0, 0, 0 ) ) + { + Impl::functorTagDispatch( + functor, i, + static_cast( j ), nval ); + } + }, + reduce_n ); + Kokkos::single( Kokkos::PerTeam( team ), [&]() {ival += reduce_n; } ); + }; + if ( str.empty() ) + Kokkos::parallel_reduce( team_policy, neigh_reduce, reduce_val ); + else + Kokkos::parallel_reduce( str, team_policy, neigh_reduce, reduce_val ); + + Kokkos::Profiling::popRegion(); +} + + +//---------------------------------------------------------------------------// +/*! + \brief Execute functor in serial within existing parallel kernel over particle + first neighbors. + + \tparam IndexType The particle index type. + \tparam FunctorType The neighbor functor type to execute. + \tparam NeighborListType The neighbor list type. + + \param i Particle index. + \param neighbor_functor The neighbor functor to execute in parallel. + \param list The neighbor list over which to execute the neighbor operations. + \param FirstNeighborsTag Tag indicating operations over particle first + neighbors. + + A "functor" is a class containing the function to execute in parallel, data + needed for that execution, and an optional \c execution_space typedef. Here + is an example functor for neighbor parallel_for: + + \code + class FunctorType { + public: + typedef ... execution_space ; + void operator() ( const int particle_index, const int neighbor_index ) const ; + }; + \endcode + + In the above example, \c Index is a Cabana index to a given AoSoA element + for a particle and its neighbor. Its operator() method defines the + operation to parallelize, over the range of indices + idx=[begin,end]. This compares to a single iteration \c idx of a + \c for loop. +*/ +template +KOKKOS_INLINE_FUNCTION void +for_each_neighbor( const IndexType i, const FunctorType& neighbor_functor, + const NeighborListType& list, const FirstNeighborsTag ) +{ + using neighbor_list_traits = NeighborList; + + for ( IndexType n = 0; + n < static_cast( + neighbor_list_traits::numNeighbor( list, i ) ); + ++n ) + neighbor_functor( + i, static_cast( + neighbor_list_traits::getNeighbor( list, i, n ) ) ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Execute team parallel functor within existing parallel kernel over + particle first neighbors. + + \tparam IndexType The particle index type. + \tparam FunctorType The neighbor functor type to execute. + \tparam NeighborListType The neighbor list type. + \tparam TeamMemberType Kokkos team policy. + + \param i Particle index. + \param team Kokkos team. + \param neighbor_functor The neighbor functor to execute in parallel. \param list The neighbor list over which to execute the neighbor operations. \param FirstNeighborsTag Tag indicating operations over particle first neighbors. @@ -1246,6 +1572,128 @@ struct LinkedCellParallelFor }; }; +/*! + \brief Struct for performing a loop over linked cell bins + and then the particles in those bins on device +*/ +template +struct LinkedCellParallelReduce +{ + //! index type to be used for _begin + using index_type = typename Policy::index_type; + + //! Execution policy + Policy _exec_policy; + //! Functor to execute + Functor _functor; + //! Linked cell list + LinkedCellType _list; + //! result of getStencilCells + ViewType _cells; + + //! beginning index of the loop + index_type _begin; + + //! discriminator for whether a particle is a neighbor or not + NeighborDiscriminator _discriminator; + + //! Constructor + LinkedCellParallelReduce( std::string label, Policy exec_policy, + Functor functor, const LinkedCellType& list, + ViewType cells, ReduceType& reduce_val, + const index_type begin = 0 ) + : _exec_policy( exec_policy ) + , _functor( functor ) + , _list( list ) + , _cells( cells ) + , _begin( begin ) + { + if ( label.empty() ) + Kokkos::parallel_reduce( dynamic_cast( exec_policy ), + *this, reduce_val ); + else + Kokkos::parallel_reduce( label, + dynamic_cast( exec_policy ), + *this, reduce_val ); + } + + //! Performs a loop over the particles in neighboring bins in Serial + KOKKOS_FUNCTION void operator()( SerialOpTag, const index_type i, + ReduceType& ival ) const + { + int imin, imax, jmin, jmax, kmin, kmax; + _list.getStencilCells( _cells( i ), imin, imax, jmin, jmax, kmin, + kmax ); + + // Loop over the cell stencil. + for ( int gi = imin; gi < imax; ++gi ) + for ( int gj = jmin; gj < jmax; ++gj ) + for ( int gk = kmin; gk < kmax; ++gk ) + { + // Check the particles in this bin to see if they are + // neighbors. + auto offset = _list.binOffset( gi, gj, gk ); + auto size = _list.binSize( gi, gj, gk ); + for ( std::size_t j = offset; j < offset + size; ++j ) + { + // Get the true id of the candidate neighbor. + std::size_t jj; + if ( !_list.sorted() ) + jj = _list.permutation( j ); + else + jj = j; + // Avoid self interactions (dummy position args). + if ( _discriminator.isValid( i, 0, 0, 0, jj, 0, 0, 0 ) ) + { + Impl::functorTagDispatch( _functor, i, jj, + ival ); + } + } + } + } + + //! Performs a loop over the particles in neighboring bins in TeamOp + KOKKOS_FUNCTION void operator()( TeamOpTag, + const typename Policy::member_type& team, + ReduceType& ival ) const + { + index_type i = team.league_rank() + _begin; + int imin, imax, jmin, jmax, kmin, kmax; + _list.getStencilCells( _cells( i ), imin, imax, jmin, jmax, kmin, + kmax ); + + // Loop over the cell stencil. + for ( int gi = imin; gi < imax; ++gi ) + for ( int gj = jmin; gj < jmax; ++gj ) + for ( int gk = kmin; gk < kmax; ++gk ) + { + // Check the particles in this bin to see if they + // are neighbors. + auto offset = _list.binOffset( gi, gj, gk ); + auto size = _list.binSize( gi, gj, gk ); + Kokkos::parallel_for( + Kokkos::TeamThreadRange( team, offset, offset + size ), + [&]( const index_type j ) + { + // Get the true id of the candidate neighbor. + std::size_t jj; + if ( !_list.sorted() ) + jj = _list.permutation( j ); + else + jj = j; + // Avoid self interactions (dummy position args). + if ( _discriminator.isValid( i, 0, 0, 0, jj, 0, 0, + 0 ) ) + { + Impl::functorTagDispatch( _functor, i, + jj, ival ); + } + } ); + } + }; +}; + //---------------------------------------------------------------------------// /*! \brief Execute functor in parallel according to the execution policy over @@ -1284,9 +1732,9 @@ struct LinkedCellParallelFor idx=[begin,end]. This compares to a single iteration \c idx of a \c for loop. */ -/* + template -inline void neighbor_parallel_for( +inline void linked_cell_parallel_for( const Kokkos::RangePolicy& exec_policy, const FunctorType& functor, const LinkedCellType& list, const FirstNeighborsTag, const SerialOpTag, const std::string& str = "", @@ -1313,7 +1761,7 @@ inline void neighbor_parallel_for( LinkedCellType, typename LinkedCellType::CountView> lcl_par( str, linear_exec_policy, functor, list, particle_bins ); } -*/ + //---------------------------------------------------------------------------// /*! \brief Execute functor in parallel according to the execution policy over @@ -1353,9 +1801,9 @@ inline void neighbor_parallel_for( idx=[begin,end]. This compares to a single iteration \c idx of a \c for loop. */ -/* + template -inline void neighbor_parallel_for( +inline void linked_cell_parallel_for( const Kokkos::RangePolicy& exec_policy, const FunctorType& functor, const LinkedCellType& list, const FirstNeighborsTag, const TeamOpTag, const std::string& str = "", @@ -1383,126 +1831,151 @@ inline void neighbor_parallel_for( lcl_par( str, team_policy, functor, list, particle_bins, exec_policy.begin() ); } -*/ -template -inline void neighbor_parallel_for( + +//---------------------------------------------------------------------------// +/*! + \brief Execute functor in parallel according to the execution policy over + particles with thread-local serial loops over linked cell list bins and + particle first neighbors in those bins. + + \tparam FunctorType The functor type to execute. + \tparam NeighborListType The neighbor list type. + \tparam ExecParams The Kokkos range policy parameters. + + \param exec_policy The policy over which to execute the functor. + \param functor The functor to execute in parallel + \param list The linked cell list over which to execute neighbor operations. + \param FirstNeighborsTag Tag indicating operations over particle first + neighbors. + \param SerialOpTag Tag indicating a serial loop strategy over neighbors. + \param str Optional name for the functor. Will be forwarded if non-empty to + the Kokkos::parallel_for called by this code and can be used for + identification and profiling purposes. + + A "functor" is a class containing the function to execute in parallel, data + needed for that execution, and an optional \c execution_space typedef. Here + is an example functor for neighbor parallel_for: + + \code + class FunctorType { + public: + typedef ... execution_space ; + void operator() ( const int particle_index, const int neighbor_index ) const ; + }; + \endcode + + In the above example, \c Index is a Cabana index to a given AoSoA element + for a particle and its neighbor. Its operator() method defines the + operation to parallelize, over the range of indices + idx=[begin,end]. This compares to a single iteration \c idx of a + \c for loop. +*/ +template +inline void linked_cell_parallel_reduce( const Kokkos::RangePolicy& exec_policy, - const FunctorType& functor, const NeighborListType& list, - const FirstNeighborsTag, const SerialOpTag, const std::string& str = "", - typename std::enable_if<( is_linked_cell_list::value ), + const FunctorType& functor, const LinkedCellType& list, + const FirstNeighborsTag, const SerialOpTag, ReduceType& reduce_val, + const std::string& str = "", + typename std::enable_if<( is_linked_cell_list::value ), int>::type* = 0 ) { - Kokkos::Profiling::pushRegion( "Cabana::neighbor_parallel_for" ); - - auto particle_bins = list.getParticleBins(); - using work_tag = typename Kokkos::RangePolicy::work_tag; - using execution_space = typename Kokkos::RangePolicy::execution_space; - using index_type = - typename Kokkos::RangePolicy::index_type; - - using neighbor_list_traits = NeighborList; - - using memory_space = typename NeighborListType::memory_space; + using memory_space = typename LinkedCellType::memory_space; auto begin = exec_policy.begin(); auto end = exec_policy.end(); - using linear_policy_type = Kokkos::RangePolicy; + using linear_policy_type = + Kokkos::RangePolicy; linear_policy_type linear_exec_policy( begin, end ); static_assert( is_accessible_from{}, "" ); - //! discriminator for whether a particle is a neighbor or not - NeighborDiscriminator _discriminator; - - auto neigh_func = KOKKOS_LAMBDA( const index_type i ) - { - for ( index_type n = 0; - n < neighbor_list_traits::numNeighbor( list, particle_bins, i ); ++n ) - { - int j = neighbor_list_traits::getNeighbor( list, particle_bins, i, n ); - if ( _discriminator.isValid( i, 0, 0, 0, j, 0, 0, 0 ) ) - { - Impl::functorTagDispatch( - functor, i, - static_cast( j ) ); - } - } - }; - if ( str.empty() ) - Kokkos::parallel_for( linear_exec_policy, neigh_func ); - else - Kokkos::parallel_for( str, linear_exec_policy, neigh_func ); + auto particle_bins = list.getParticleBins(); - Kokkos::Profiling::popRegion(); + LinkedCellParallelReduce + lcl_par( str, linear_exec_policy, functor, list, particle_bins, + reduce_val ); } -template -inline void neighbor_parallel_for( +//---------------------------------------------------------------------------// +/*! + \brief Execute functor in parallel according to the execution policy over + particles with thread-local serial loops over linked cell list bins and + team threading over particle first neighbors in those bins. + + \tparam FunctorType The functor type to execute. + \tparam NeighborListType The neighbor list type. + \tparam ExecParams The Kokkos range policy parameters. + + \param exec_policy The policy over which to execute the functor. + \param functor The functor to execute in parallel + \param list The linked cell list over which to execute neighbor operations. + \param FirstNeighborsTag Tag indicating operations over particle first + neighbors. + \param TeamOpTag Tag indicating a team parallel strategy over particle + neighbors. + \param str Optional name for the functor. Will be forwarded if non-empty to + the Kokkos::parallel_for called by this code and can be used for + identification and profiling purposes. + + A "functor" is a class containing the function to execute in parallel, data + needed for that execution, and an optional \c execution_space typedef. Here + is an example functor for neighbor parallel_for: + + \code + class FunctorType { + public: + typedef ... execution_space ; + void operator() ( const int particle_index, const int neighbor_index ) const ; + }; + \endcode + + In the above example, \c Index is a Cabana index to a given AoSoA element + for a particle and its neighbor. Its operator() method defines the + operation to parallelize, over the range of indices + idx=[begin,end]. This compares to a single iteration \c idx of a + \c for loop. +*/ +template +inline void linked_cell_parallel_reduce( const Kokkos::RangePolicy& exec_policy, - const FunctorType& functor, const NeighborListType& list, - const FirstNeighborsTag, const TeamOpTag, const std::string& str = "", - typename std::enable_if<( is_linked_cell_list::value ), + const FunctorType& functor, const LinkedCellType& list, + const FirstNeighborsTag, const TeamOpTag, ReduceType& reduce_val, + const std::string& str = "", + typename std::enable_if<( is_linked_cell_list::value ), int>::type* = 0 ) { - Kokkos::Profiling::pushRegion( "Cabana::neighbor_parallel_for" ); - - auto particle_bins = list.getParticleBins(); - using work_tag = typename Kokkos::RangePolicy::work_tag; - using execution_space = typename Kokkos::RangePolicy::execution_space; - using kokkos_policy = - Kokkos::TeamPolicy>; - kokkos_policy team_policy( exec_policy.end() - exec_policy.begin(), - Kokkos::AUTO ); - - using index_type = typename kokkos_policy::index_type; - - using neighbor_list_traits = NeighborList; + using team_policy_type = + Kokkos::TeamPolicy>; + team_policy_type team_policy( exec_policy.end() - exec_policy.begin(), + Kokkos::AUTO ); - using memory_space = typename NeighborListType::memory_space; + using memory_space = typename LinkedCellType::memory_space; static_assert( is_accessible_from{}, "" ); - const auto range_begin = exec_policy.begin(); - - //! discriminator for whether a particle is a neighbor or not - NeighborDiscriminator _discriminator; - - auto neigh_func = - KOKKOS_LAMBDA( const typename kokkos_policy::member_type& team ) - { - index_type i = team.league_rank() + range_begin; - Kokkos::parallel_for( - Kokkos::TeamThreadRange( - team, neighbor_list_traits::numNeighbor( list, particle_bins, i ) ), - [&]( const index_type n ) - { - int j = neighbor_list_traits::getNeighbor( list, particle_bins, i, n ); - if ( _discriminator.isValid( i, 0, 0, 0, j, 0, 0, 0 ) ) - { - Impl::functorTagDispatch( - functor, i, - static_cast( j ) ); - } - } ); - }; - if ( str.empty() ) - Kokkos::parallel_for( team_policy, neigh_func ); - else - Kokkos::parallel_for( str, team_policy, neigh_func ); + auto particle_bins = list.getParticleBins(); - Kokkos::Profiling::popRegion(); + LinkedCellParallelReduce + lcl_par( str, team_policy, functor, list, particle_bins, reduce_val, + exec_policy.begin() ); } - } // end namespace Cabana #endif // end CABANA_PARALLEL_HPP diff --git a/core/unit_test/tstLinkedCellList.hpp b/core/unit_test/tstLinkedCellList.hpp index a72071e1a..26cb09e1a 100644 --- a/core/unit_test/tstLinkedCellList.hpp +++ b/core/unit_test/tstLinkedCellList.hpp @@ -425,6 +425,79 @@ void checkLinkedCellParallel( const ListType& nlist, num_particle ); Kokkos::View team_result( "team_result", num_particle ); + // Test the list parallel operation by adding a value from each neighbor + // to the particle (within cutoff) and compare to counts. + auto c2 = cutoff * cutoff; + auto serial_count_op = KOKKOS_LAMBDA( const int i, const int j ) + { + const double dx = positions( i, 0 ) - positions( j, 0 ); + const double dy = positions( i, 1 ) - positions( j, 1 ); + const double dz = positions( i, 2 ) - positions( j, 2 ); + const double r2 = dx * dx + dy * dy + dz * dz; + if ( r2 <= c2 ) + { + if ( nlist.sorted() ) + { + Kokkos::atomic_add( &serial_result( nlist.permutation( i ) ), + nlist.permutation( j ) ); + } + else + { + Kokkos::atomic_add( &serial_result( i ), j ); + } + } + }; + auto team_count_op = KOKKOS_LAMBDA( const int i, const int j ) + { + const double dx = positions( i, 0 ) - positions( j, 0 ); + const double dy = positions( i, 1 ) - positions( j, 1 ); + const double dz = positions( i, 2 ) - positions( j, 2 ); + const double r2 = dx * dx + dy * dy + dz * dz; + if ( r2 <= c2 ) + { + if ( nlist.sorted() ) + { + Kokkos::atomic_add( &team_result( nlist.permutation( i ) ), + nlist.permutation( j ) ); + } + else + { + Kokkos::atomic_add( &team_result( i ), j ); + } + } + }; + Kokkos::RangePolicy policy( 0, num_particle ); + Cabana::linked_cell_parallel_for( policy, serial_count_op, nlist, + Cabana::FirstNeighborsTag(), + Cabana::SerialOpTag(), "test_1st_serial" ); + Cabana::linked_cell_parallel_for( policy, team_count_op, nlist, + Cabana::FirstNeighborsTag(), + Cabana::TeamOpTag(), "test_1st_team" ); + Kokkos::fence(); + + checkFirstNeighborParallelFor( N2_list_copy, serial_result, team_result, + 1 ); + +} + +//---------------------------------------------------------------------------// +// linked_list_parallel +//---------------------------------------------------------------------------// +template +void checkLinkedCellNeighborParallel( const ListType& nlist, + const TestListType& N2_list_copy, + const int num_particle, + const PositionType positions, + const double cutoff ) +{ + // Create Kokkos views for the write operation. + using memory_space = typename TEST_MEMSPACE::memory_space; + Kokkos::View N2_result( "N2_result", + num_particle ); + Kokkos::View serial_result( "serial_result", + num_particle ); + Kokkos::View team_result( "team_result", num_particle ); + // Test the list parallel operation by adding a value from each neighbor // to the particle (within cutoff) and compare to counts. auto c2 = cutoff * cutoff; @@ -477,6 +550,7 @@ void checkLinkedCellParallel( const ListType& nlist, checkFirstNeighborParallelFor( N2_list_copy, serial_result, team_result, 1 ); + } //---------------------------------------------------------------------------// @@ -506,11 +580,17 @@ void testLinkedCellParallel() checkLinkedCellParallel( nlist, test_data.N2_list_copy, test_data.num_particle, positions, test_data.test_radius ); + checkLinkedCellNeighborParallel( nlist, test_data.N2_list_copy, + test_data.num_particle, positions, + test_data.test_radius ); Cabana::permute( nlist, positions ); checkLinkedCellParallel( nlist, test_data.N2_list_copy, test_data.num_particle, positions, test_data.test_radius ); + checkLinkedCellNeighborParallel( nlist, test_data.N2_list_copy, + test_data.num_particle, positions, + test_data.test_radius ); } //---------------------------------------------------------------------------// diff --git a/core/unit_test/tstNeighborList.hpp b/core/unit_test/tstNeighborList.hpp index 4b111ff0f..18f17dbbb 100644 --- a/core/unit_test/tstNeighborList.hpp +++ b/core/unit_test/tstNeighborList.hpp @@ -22,84 +22,6 @@ namespace Test { -//---------------------------------------------------------------------------// -// Linked cell list cell stencil test. -/* -void testLinkedCellStencil() -{ - // Point in the middle - { - double min[3] = { 0.0, 0.0, 0.0 }; - double max[3] = { 10.0, 10.0, 10.0 }; - double radius = 1.0; - double ratio = 1.0; - Cabana::LinkedCellStencil stencil( radius, ratio, min, max ); - - double xp = 4.5; - double yp = 5.5; - double zp = 3.5; - int ic, jc, kc; - stencil.grid.locatePoint( xp, yp, zp, ic, jc, kc ); - int cell = stencil.grid.cardinalCellIndex( ic, jc, kc ); - int imin, imax, jmin, jmax, kmin, kmax; - stencil.getCells( cell, imin, imax, jmin, jmax, kmin, kmax ); - EXPECT_EQ( imin, 3 ); - EXPECT_EQ( imax, 6 ); - EXPECT_EQ( jmin, 4 ); - EXPECT_EQ( jmax, 7 ); - EXPECT_EQ( kmin, 2 ); - EXPECT_EQ( kmax, 5 ); - } - - // Point in the lower right corner - { - double min[3] = { 0.0, 0.0, 0.0 }; - double max[3] = { 10.0, 10.0, 10.0 }; - double radius = 1.0; - double ratio = 1.0; - Cabana::LinkedCellStencil stencil( radius, ratio, min, max ); - - double xp = 0.5; - double yp = 0.5; - double zp = 0.5; - int ic, jc, kc; - stencil.grid.locatePoint( xp, yp, zp, ic, jc, kc ); - int cell = stencil.grid.cardinalCellIndex( ic, jc, kc ); - int imin, imax, jmin, jmax, kmin, kmax; - stencil.getCells( cell, imin, imax, jmin, jmax, kmin, kmax ); - EXPECT_EQ( imin, 0 ); - EXPECT_EQ( imax, 2 ); - EXPECT_EQ( jmin, 0 ); - EXPECT_EQ( jmax, 2 ); - EXPECT_EQ( kmin, 0 ); - EXPECT_EQ( kmax, 2 ); - } - - // Point in the upper left corner - { - double min[3] = { 0.0, 0.0, 0.0 }; - double max[3] = { 10.0, 10.0, 10.0 }; - double radius = 1.0; - double ratio = 1.0; - Cabana::LinkedCellStencil stencil( radius, ratio, min, max ); - - double xp = 9.5; - double yp = 9.5; - double zp = 9.5; - int ic, jc, kc; - stencil.grid.locatePoint( xp, yp, zp, ic, jc, kc ); - int cell = stencil.grid.cardinalCellIndex( ic, jc, kc ); - int imin, imax, jmin, jmax, kmin, kmax; - stencil.getCells( cell, imin, imax, jmin, jmax, kmin, kmax ); - EXPECT_EQ( imin, 8 ); - EXPECT_EQ( imax, 10 ); - EXPECT_EQ( jmin, 8 ); - EXPECT_EQ( jmax, 10 ); - EXPECT_EQ( kmin, 8 ); - EXPECT_EQ( kmax, 10 ); - } -} -*/ //---------------------------------------------------------------------------// template