diff --git a/benchmark/core/Cabana_LinkedCellPerformance.cpp b/benchmark/core/Cabana_LinkedCellPerformance.cpp index 39d087b80..58a1a6028 100644 --- a/benchmark/core/Cabana_LinkedCellPerformance.cpp +++ b/benchmark/core/Cabana_LinkedCellPerformance.cpp @@ -30,7 +30,9 @@ void performanceTest( std::ostream& stream, const std::string& test_prefix, std::vector problem_sizes, std::vector cutoff_ratios ) { + using exec_space = typename Device::execution_space; using memory_space = typename Device::memory_space; + using IterTag = Cabana::SerialOpTag; // Declare problem sizes. int num_problem_size = problem_sizes.size(); @@ -76,12 +78,36 @@ void performanceTest( std::ostream& stream, const std::string& test_prefix, << cutoff_ratios[c]; Cabana::Benchmark::Timer create_timer( create_time_name.str(), num_problem_size ); + std::stringstream iteration_time_name; + iteration_time_name << test_prefix << "linkedcell_iteration_" + << cutoff_ratios[c]; + Cabana::Benchmark::Timer iteration_timer( iteration_time_name.str(), + num_problem_size ); + std::stringstream neigh_iteration_time_name; + neigh_iteration_time_name << test_prefix + << "linkedcell_neighbor_iteration_" + << cutoff_ratios[c]; + Cabana::Benchmark::Timer neigh_iteration_timer( + neigh_iteration_time_name.str(), num_problem_size ); std::stringstream sort_time_name; sort_time_name << test_prefix << "linkedcell_sort_" << cutoff_ratios[c]; Cabana::Benchmark::Timer sort_timer( sort_time_name.str(), num_problem_size ); + std::stringstream iteration_sorted_time_name; + iteration_sorted_time_name << test_prefix + << "linkedcell_iteration_sorted_" + << cutoff_ratios[c]; + Cabana::Benchmark::Timer iteration_sorted_timer( + iteration_sorted_time_name.str(), num_problem_size ); + std::stringstream neigh_iteration_sorted_time_name; + neigh_iteration_sorted_time_name + << test_prefix << "linkedcell_neighbor_iteration_sorted_" + << cutoff_ratios[c]; + Cabana::Benchmark::Timer neigh_iteration_sorted_timer( + neigh_iteration_sorted_time_name.str(), num_problem_size ); // Loop over the problem sizes. + int pid = 0; std::vector psizes; for ( int p = 0; p < num_problem_size; ++p ) { @@ -98,27 +124,75 @@ void performanceTest( std::ostream& stream, const std::string& test_prefix, double sort_delta[3] = { cutoff, cutoff, cutoff }; double grid_min[3] = { x_min[p], x_min[p], x_min[p] }; double grid_max[3] = { x_max[p], x_max[p], x_max[p] }; - Cabana::LinkedCellList linked_cell_list( + auto linked_cell_list = Cabana::createLinkedCellList( x, sort_delta, grid_min, grid_max ); + // Setup for neighbor iteration. + Kokkos::View per_particle_result( "result", + num_p ); + auto count_op = KOKKOS_LAMBDA( const int i, const int n ) + { + Kokkos::atomic_add( &per_particle_result( i ), n ); + }; + Kokkos::RangePolicy policy( 0, num_p ); + // Run tests and time the ensemble for ( int t = 0; t < num_run; ++t ) { // Build the linked cell list. - create_timer.start( p ); + create_timer.start( pid ); linked_cell_list.build( x ); - create_timer.stop( p ); + create_timer.stop( pid ); + + iteration_timer.start( pid ); + Cabana::linked_cell_parallel_for( + policy, count_op, linked_cell_list, + Cabana::FirstNeighborsTag(), IterTag(), + "test_linked_cell" ); + Kokkos::fence(); + iteration_timer.stop( pid ); + + neigh_iteration_timer.start( pid ); + Cabana::neighbor_parallel_for( + policy, count_op, linked_cell_list, + Cabana::FirstNeighborsTag(), IterTag(), "test_neighbor" ); + Kokkos::fence(); + neigh_iteration_timer.stop( pid ); // Sort the particles. sort_timer.start( p ); Cabana::permute( linked_cell_list, aosoas[p] ); sort_timer.stop( p ); + + iteration_sorted_timer.start( pid ); + Cabana::linked_cell_parallel_for( + policy, count_op, linked_cell_list, + Cabana::FirstNeighborsTag(), IterTag(), + "test_linked_cell_sorted" ); + Kokkos::fence(); + iteration_sorted_timer.stop( pid ); + + neigh_iteration_sorted_timer.start( pid ); + Cabana::neighbor_parallel_for( + policy, count_op, linked_cell_list, + Cabana::FirstNeighborsTag(), IterTag(), + "test_neighbor_sorted" ); + Kokkos::fence(); + neigh_iteration_sorted_timer.stop( pid ); } + + // Increment the problem id. + ++pid; } // Output results. outputResults( stream, "problem_size", psizes, create_timer ); + outputResults( stream, "problem_size", psizes, iteration_timer ); + outputResults( stream, "problem_size", psizes, neigh_iteration_timer ); outputResults( stream, "problem_size", psizes, sort_timer ); + outputResults( stream, "problem_size", psizes, iteration_sorted_timer ); + outputResults( stream, "problem_size", psizes, + neigh_iteration_sorted_timer ); } } diff --git a/benchmark/core/Cabana_NeighborArborXPerformance.cpp b/benchmark/core/Cabana_NeighborArborXPerformance.cpp index 3902d8425..e7647c417 100644 --- a/benchmark/core/Cabana_NeighborArborXPerformance.cpp +++ b/benchmark/core/Cabana_NeighborArborXPerformance.cpp @@ -78,7 +78,7 @@ void performanceTest( std::ostream& stream, const std::string& test_prefix, double cutoff = cutoff_ratios.front(); double sort_delta[3] = { cutoff, cutoff, cutoff }; auto x = Cabana::slice<0>( aosoas[p], "position" ); - Cabana::LinkedCellList linked_cell_list( + auto linked_cell_list = Cabana::createLinkedCellList( x, sort_delta, grid_min, grid_max ); Cabana::permute( linked_cell_list, aosoas[p] ); } diff --git a/benchmark/core/Cabana_NeighborVerletPerformance.cpp b/benchmark/core/Cabana_NeighborVerletPerformance.cpp index 5837e07a0..907fb3b73 100644 --- a/benchmark/core/Cabana_NeighborVerletPerformance.cpp +++ b/benchmark/core/Cabana_NeighborVerletPerformance.cpp @@ -83,7 +83,7 @@ void performanceTest( std::ostream& stream, const std::string& test_prefix, // in cells the size of the smallest cutoff distance. double cutoff = cutoff_ratios.front(); double sort_delta[3] = { cutoff, cutoff, cutoff }; - Cabana::LinkedCellList linked_cell_list( + auto linked_cell_list = Cabana::createLinkedCellList( x, sort_delta, grid_min, grid_max ); Cabana::permute( linked_cell_list, aosoas[p] ); } diff --git a/core/src/Cabana_LinkedCellList.hpp b/core/src/Cabana_LinkedCellList.hpp index d352c0407..40f1a6242 100644 --- a/core/src/Cabana_LinkedCellList.hpp +++ b/core/src/Cabana_LinkedCellList.hpp @@ -11,11 +11,12 @@ /*! \file Cabana_LinkedCellList.hpp - \brief Linked cell list binning and sorting + \brief Linked cell list binning (spatial sorting) and neighbor iteration. */ #ifndef CABANA_LINKEDCELLLIST_HPP #define CABANA_LINKEDCELLLIST_HPP +#include #include #include #include @@ -28,12 +29,67 @@ namespace Cabana { +//---------------------------------------------------------------------------// +//! Stencil of cells surrounding each cell. + +template +struct LinkedCellStencil +{ + //! Background grid. + Impl::CartesianGrid grid; + //! Maximum cells per dimension. + int max_cells_dir; + //! Maximum total cells. + int max_cells; + //! Range of cells to search based on cutoff. + int cell_range; + + //! Default Constructor + LinkedCellStencil() {} + + //! Constructor + LinkedCellStencil( const Scalar neighborhood_radius, + const Scalar cell_size_ratio, const Scalar grid_min[3], + const Scalar grid_max[3] ) + { + Scalar dx = neighborhood_radius * cell_size_ratio; + grid = Impl::CartesianGrid( + grid_min[0], grid_min[1], grid_min[2], grid_max[0], grid_max[1], + grid_max[2], dx, dx, dx ); + cell_range = std::ceil( 1 / cell_size_ratio ); + max_cells_dir = 2 * cell_range + 1; + max_cells = max_cells_dir * max_cells_dir * max_cells_dir; + } + + //! Given a cell, get the index bounds of the cell stencil. + KOKKOS_INLINE_FUNCTION + void getCells( const int cell, int& imin, int& imax, int& jmin, int& jmax, + int& kmin, int& kmax ) const + { + int i, j, k; + grid.ijkBinIndex( cell, i, j, k ); + + kmin = ( k - cell_range > 0 ) ? k - cell_range : 0; + kmax = + ( k + cell_range + 1 < grid._nz ) ? k + cell_range + 1 : grid._nz; + + jmin = ( j - cell_range > 0 ) ? j - cell_range : 0; + jmax = + ( j + cell_range + 1 < grid._ny ) ? j + cell_range + 1 : grid._ny; + + imin = ( i - cell_range > 0 ) ? i - cell_range : 0; + imax = + ( i + cell_range + 1 < grid._nx ) ? i + cell_range + 1 : grid._nx; + } +}; + //---------------------------------------------------------------------------// /*! \brief Data describing the bin sizes and offsets resulting from a binning operation on a 3d regular Cartesian grid. + \note Defaults to double precision for backwards compatibility. */ -template +template class LinkedCellList { public: @@ -57,6 +113,8 @@ class LinkedCellList using CountView = Kokkos::View; //! Offset view type. using OffsetView = Kokkos::View; + //! Stencil type. + using stencil_type = Cabana::LinkedCellStencil; /*! \brief Default constructor. @@ -69,23 +127,79 @@ class LinkedCellList \tparam SliceType Slice type for positions. \param positions Slice of positions. - \param grid_delta Grid sizes in each cardinal direction. + \param grid_min Grid minimum value in each direction. + \param grid_max Grid maximum value in each direction. + */ + template + LinkedCellList( SliceType positions, const Scalar grid_delta[3], + const Scalar grid_min[3], const Scalar grid_max[3], + typename std::enable_if<( is_slice::value ), + int>::type* = 0 ) + : _grid( grid_min[0], grid_min[1], grid_min[2], grid_max[0], + grid_max[1], grid_max[2], grid_delta[0], grid_delta[1], + grid_delta[2] ) + , _cell_stencil( grid_delta[0], 1.0, grid_min, grid_max ) + , _sorted( false ) + { + std::size_t np = positions.size(); + allocate( totalBins(), np ); + build( positions, 0, np ); + } + /*! + \brief Slice constructor + + \tparam SliceType Slice type for positions. + + \param positions Slice of positions. + \param begin The beginning index of the AoSoA range to sort. + \param end The end index of the AoSoA range to sort. + \param grid_delta Grid sizes in each cardinal direction. \param grid_min Grid minimum value in each direction. + \param grid_max Grid maximum value in each direction. + */ + template + LinkedCellList( SliceType positions, const std::size_t begin, + const std::size_t end, const Scalar grid_delta[3], + const Scalar grid_min[3], const Scalar grid_max[3], + typename std::enable_if<( is_slice::value ), + int>::type* = 0 ) + : _grid( grid_min[0], grid_min[1], grid_min[2], grid_max[0], + grid_max[1], grid_max[2], grid_delta[0], grid_delta[1], + grid_delta[2] ) + , _cell_stencil( grid_delta[0], 1.0, grid_min, grid_max ) + , _sorted( false ) + { + allocate( totalBins(), end - begin ); + build( positions, begin, end ); + } + + /*! + \brief Slice constructor + + \tparam SliceType Slice type for positions. + \param positions Slice of positions. + \param grid_delta Grid sizes in each cardinal direction. + \param grid_min Grid minimum value in each direction. \param grid_max Grid maximum value in each direction. + \param neighborhood_radius Radius for neighbors. + \param cell_size_ratio Ratio of the cell size to the neighborhood size. */ template LinkedCellList( - SliceType positions, const typename SliceType::value_type grid_delta[3], - const typename SliceType::value_type grid_min[3], - const typename SliceType::value_type grid_max[3], + SliceType positions, const Scalar grid_delta[3], + const Scalar grid_min[3], const Scalar grid_max[3], + const Scalar neighborhood_radius, const Scalar cell_size_ratio = 1, typename std::enable_if<( is_slice::value ), int>::type* = 0 ) : _grid( grid_min[0], grid_min[1], grid_min[2], grid_max[0], grid_max[1], grid_max[2], grid_delta[0], grid_delta[1], grid_delta[2] ) + , _cell_stencil( neighborhood_radius, cell_size_ratio, grid_min, + grid_max ) + , _sorted( false ) { std::size_t np = positions.size(); allocate( totalBins(), np ); @@ -98,33 +212,37 @@ class LinkedCellList \tparam SliceType Slice type for positions. \param positions Slice of positions. - \param begin The beginning index of the AoSoA range to sort. - \param end The end index of the AoSoA range to sort. - \param grid_delta Grid sizes in each cardinal direction. - \param grid_min Grid minimum value in each direction. - \param grid_max Grid maximum value in each direction. + \param neighborhood_radius Radius for neighbors. + \param cell_size_ratio Ratio of the cell size to the neighborhood size. */ template LinkedCellList( SliceType positions, const std::size_t begin, const std::size_t end, - const typename SliceType::value_type grid_delta[3], - const typename SliceType::value_type grid_min[3], - const typename SliceType::value_type grid_max[3], + const Scalar grid_delta[3], const Scalar grid_min[3], + const Scalar grid_max[3], const Scalar neighborhood_radius, + const Scalar cell_size_ratio = 1, typename std::enable_if<( is_slice::value ), int>::type* = 0 ) : _grid( grid_min[0], grid_min[1], grid_min[2], grid_max[0], grid_max[1], grid_max[2], grid_delta[0], grid_delta[1], grid_delta[2] ) + , _cell_stencil( neighborhood_radius, cell_size_ratio, grid_min, + grid_max ) + , _sorted( false ) { allocate( totalBins(), end - begin ); build( positions, begin, end ); } + //! Number of binned particles. + KOKKOS_INLINE_FUNCTION + int numParticles() const { return _permutes.extent( 0 ); } + /*! \brief Get the total number of bins. \return the total number of bins. @@ -222,6 +340,13 @@ class LinkedCellList KOKKOS_INLINE_FUNCTION std::size_t rangeEnd() const { return _bin_data.rangeEnd(); } + /*! + \brief Get the linked cell stencil. + \return Cell stencil. + */ + KOKKOS_INLINE_FUNCTION + stencil_type cellStencil() const { return _cell_stencil; } + /*! \brief Get the 1d bin data. \return The 1d bin data. @@ -352,14 +477,86 @@ class LinkedCellList build( positions, 0, positions.size() ); } + /*! + \brief Get the bin cell index for each binned particle. + */ + auto getParticleBins() const + { + Kokkos::parallel_for( + "Cabana::LinkedCellList::storeBinIndices", + Kokkos::RangePolicy( 0, totalBins() ), *this ); + return _particle_bins; + } + + /*! + \brief Get the bin cell index of the input particle + */ + KOKKOS_INLINE_FUNCTION + auto getParticleBin( const int particle_index ) const + { + return _particle_bins( particle_index ); + } + + /*! + \brief Determines which particles belong to bin i + \param i the cardinal bin index of the current bin + */ + KOKKOS_FUNCTION void operator()( const int i ) const + { + int bin_ijk[3]; + ijkBinIndex( i, bin_ijk[0], bin_ijk[1], bin_ijk[2] ); + auto offset = binOffset( bin_ijk[0], bin_ijk[1], bin_ijk[2] ); + auto size = binSize( bin_ijk[0], bin_ijk[1], bin_ijk[2] ); + for ( size_t p = offset; p < offset + size; ++p ) + { + if ( _sorted ) + { + _particle_bins( p ) = i; + } + else + { + _particle_bins( permutation( p ) ) = i; + } + } + } + + /*! + \brief Updates the status of the list sorting + \param sorted Bool that is true if the list has been sorted + */ + void update( const bool sorted ) { _sorted = sorted; } + + /*! + \brief Returns whether the list has been sorted or not + */ + KOKKOS_INLINE_FUNCTION + auto sorted() const { return _sorted; } + + /*! + \brief Get the cell indices for the stencil about cell + */ + KOKKOS_INLINE_FUNCTION + void getStencilCells( const int cell, int& imin, int& imax, int& jmin, + int& jmax, int& kmin, int& kmax ) const + { + _cell_stencil.getCells( cell, imin, imax, jmin, jmax, kmin, kmax ); + } + private: + // Building the linked cell. BinningData _bin_data; - Impl::CartesianGrid _grid; + Impl::CartesianGrid _grid; CountView _counts; OffsetView _offsets; OffsetView _permutes; + // Iterating over the linked cell. + stencil_type _cell_stencil; + + bool _sorted; + CountView _particle_bins; + void allocate( const int ncell, const int nparticles ) { _counts = CountView( @@ -371,9 +568,72 @@ class LinkedCellList _permutes = OffsetView( Kokkos::view_alloc( Kokkos::WithoutInitializing, "permutes" ), nparticles ); + // This is only used for iterating over the particles (not building the + // permutaion vector). + _particle_bins = CountView( + Kokkos::view_alloc( Kokkos::WithoutInitializing, "counts" ), + nparticles ); } }; +/*! + \brief Creation function for linked cell list. + \return LinkedCellList. +*/ +template +auto createLinkedCellList( SliceType positions, const Scalar grid_delta[3], + const Scalar grid_min[3], const Scalar grid_max[3] ) +{ + return LinkedCellList( positions, grid_delta, grid_min, + grid_max ); +} + +/*! + \brief Creation function for linked cell list with partial range. + \return LinkedCellList. +*/ +template +auto createLinkedCellList( SliceType positions, const std::size_t begin, + const std::size_t end, const Scalar grid_delta[3], + const Scalar grid_min[3], const Scalar grid_max[3] ) +{ + return LinkedCellList( + positions, begin, end, grid_delta, grid_min, grid_max ); +} + +/*! + \brief Creation function for linked cell list with custom cutoff radius and + cell ratio. + \return LinkedCellList. +*/ +template +auto createLinkedCellList( SliceType positions, const Scalar grid_delta[3], + const Scalar grid_min[3], const Scalar grid_max[3], + const Scalar neighborhood_radius, + const Scalar cell_size_ratio = 1.0 ) +{ + return LinkedCellList( positions, grid_delta, grid_min, + grid_max, neighborhood_radius, + cell_size_ratio ); +} + +/*! + \brief Creation function for linked cell list with partial range and custom + cutoff radius and/or cell ratio. + \return LinkedCellList. +*/ +template +auto createLinkedCellList( SliceType positions, const std::size_t begin, + const std::size_t end, const Scalar grid_delta[3], + const Scalar grid_min[3], const Scalar grid_max[3], + const Scalar neighborhood_radius, + const Scalar cell_size_ratio = 1.0 ) +{ + return LinkedCellList( + positions, begin, end, grid_delta, grid_min, grid_max, + neighborhood_radius, cell_size_ratio ); +} + //---------------------------------------------------------------------------// //! \cond Impl template @@ -381,8 +641,8 @@ struct is_linked_cell_list_impl : public std::false_type { }; -template -struct is_linked_cell_list_impl> +template +struct is_linked_cell_list_impl> : public std::true_type { }; @@ -409,12 +669,15 @@ struct is_linked_cell_list */ template void permute( - const LinkedCellListType& linked_cell_list, AoSoA_t& aosoa, + LinkedCellListType& linked_cell_list, AoSoA_t& aosoa, typename std::enable_if<( is_linked_cell_list::value && is_aosoa::value ), int>::type* = 0 ) { permute( linked_cell_list.binningData(), aosoa ); + + // Update internal state. + linked_cell_list.update( true ); } //---------------------------------------------------------------------------// @@ -431,15 +694,102 @@ void permute( */ template void permute( - const LinkedCellListType& linked_cell_list, SliceType& slice, + LinkedCellListType& linked_cell_list, SliceType& slice, typename std::enable_if<( is_linked_cell_list::value && is_slice::value ), int>::type* = 0 ) { permute( linked_cell_list.binningData(), slice ); + + // Update internal state. + linked_cell_list.update( true ); } //---------------------------------------------------------------------------// +//! LinkedCellList NeighborList interface. +template +class NeighborList> +{ + public: + //! Kokkos memory space. + using memory_space = MemorySpace; + //! Neighbor list type. + using list_type = LinkedCellList; + + //! Get the maximum number of neighbors per particle. + KOKKOS_INLINE_FUNCTION static std::size_t + totalNeighbor( const list_type& list ) + { + return Impl::totalNeighbor( list, list.numParticles() ); + } + + //! Get the maximum number of neighbors across all particles. + KOKKOS_INLINE_FUNCTION + static std::size_t maxNeighbor( const list_type& list ) + { + return Impl::maxNeighbor( list, list.numParticles() ); + } + + //! Get the number of neighbors for a given particle index. + KOKKOS_INLINE_FUNCTION static std::size_t + numNeighbor( const list_type& list, const std::size_t particle_index ) + { + int total_count = 0; + int imin, imax, jmin, jmax, kmin, kmax; + list.getStencilCells( list.getParticleBin( particle_index ), imin, imax, + jmin, jmax, kmin, kmax ); + + // Loop over the cell stencil. + for ( int i = imin; i < imax; ++i ) + for ( int j = jmin; j < jmax; ++j ) + for ( int k = kmin; k < kmax; ++k ) + { + total_count += list.binSize( i, j, k ); + } + return total_count; + } + + //! Get the id for a neighbor for a given particle index and the index of + //! the neighbor relative to the particle. + KOKKOS_INLINE_FUNCTION static std::size_t + getNeighbor( const list_type& list, const std::size_t particle_index, + const std::size_t neighbor_index ) + { + std::size_t total_count = 0; + std::size_t previous_count = 0; + int imin, imax, jmin, jmax, kmin, kmax; + list.getStencilCells( list.getParticleBin( particle_index ), imin, imax, + jmin, jmax, kmin, kmax ); + + // Loop over the cell stencil. + for ( int i = imin; i < imax; ++i ) + for ( int j = jmin; j < jmax; ++j ) + for ( int k = kmin; k < kmax; ++k ) + { + total_count += list.binSize( i, j, k ); + // This neighbor is in this bin. + if ( total_count > neighbor_index ) + { + int particle_id = list.binOffset( i, j, k ) + + ( neighbor_index - previous_count ); + if ( list.sorted() ) + { + return particle_id; + } + else + { + return list.permutation( particle_id ); + } + } + previous_count = total_count; + } + + assert( total_count <= totalNeighbor( list ) ); + + // Should never make it to this point. + return 0; + } +}; } // end namespace Cabana diff --git a/core/src/Cabana_NeighborList.hpp b/core/src/Cabana_NeighborList.hpp index ca05faf95..4055198b6 100644 --- a/core/src/Cabana_NeighborList.hpp +++ b/core/src/Cabana_NeighborList.hpp @@ -47,6 +47,61 @@ class HalfNeighborTag { }; +//---------------------------------------------------------------------------// +//! Neighborhood discriminator. +template +class NeighborDiscriminator; + +//! Full list discriminator specialization. +template <> +class NeighborDiscriminator +{ + public: + /*! + \brief Check whether neighbor pair is valid. + + Full neighbor lists count and store the neighbors of all particles. The + only criteria for a potentially valid neighbor is that the particle does + not neighbor itself (i.e. the particle index "p" is not the same as the + neighbor index "n"). + */ + KOKKOS_INLINE_FUNCTION + static bool isValid( const std::size_t p, const double, const double, + const double, const std::size_t n, const double, + const double, const double ) + { + return ( p != n ); + } +}; + +//! Half list discriminator specialization. +template <> +class NeighborDiscriminator +{ + public: + /*! + \brief Check whether neighbor pair is valid. + + Half neighbor lists only store half of the neighbors be eliminating + duplicate pairs such that the fact that particle "p" neighbors particle + "n" is stored in the list but "n" neighboring "p" is not stored but rather + implied. We discriminate by only storing neighbors whose coordinates are + greater in the x direction. If they are the same then the y direction is + checked next and finally the z direction if the y coordinates are the + same. + */ + KOKKOS_INLINE_FUNCTION static bool + isValid( const std::size_t p, const double xp, const double yp, + const double zp, const std::size_t n, const double xn, + const double yn, const double zn ) + { + return ( ( p != n ) && + ( ( xn > xp ) || + ( ( xn == xp ) && + ( ( yn > yp ) || ( ( yn == yp ) && ( zn > zp ) ) ) ) ) ); + } +}; + //---------------------------------------------------------------------------// /*! \brief Neighbor list interface. Provides an interface callable at the diff --git a/core/src/Cabana_Parallel.hpp b/core/src/Cabana_Parallel.hpp index f46d674f8..1225c92ef 100644 --- a/core/src/Cabana_Parallel.hpp +++ b/core/src/Cabana_Parallel.hpp @@ -17,8 +17,10 @@ #define CABANA_PARALLEL_HPP #include +#include #include #include // is_accessible_from +#include #include @@ -1103,6 +1105,503 @@ for_each_neighbor( const IndexType i, const TeamMemberType team, } ); } +//---------------------------------------------------------------------------// +// Linked Cell Parallel For +//---------------------------------------------------------------------------// +namespace Impl +{ +//! \cond Impl +/*! + \brief Struct for performing a loop over linked cell bins + and then the particles in those bins on device +*/ +template +struct LinkedCellParallelFor +{ + //! index type to be used for _begin + using index_type = typename Policy::index_type; + + //! Execution policy + Policy _exec_policy; + //! Functor to execute + Functor _functor; + //! Linked cell list + LinkedCellType _list; + //! result of getStencilCells + ViewType _cells; + + //! beginning index of the loop + index_type _begin; + + //! discriminator for whether a particle is a neighbor or not + NeighborDiscriminator _discriminator; + + //! Constructor + LinkedCellParallelFor( std::string label, Policy exec_policy, + Functor functor, const LinkedCellType& list, + ViewType cells, const index_type begin = 0 ) + : _exec_policy( exec_policy ) + , _functor( functor ) + , _list( list ) + , _cells( cells ) + , _begin( begin ) + { + if ( label.empty() ) + Kokkos::parallel_for( dynamic_cast( exec_policy ), + *this ); + else + Kokkos::parallel_for( + label, dynamic_cast( exec_policy ), *this ); + } + + //! Performs a loop over the particles in neighboring bins in Serial + KOKKOS_FUNCTION void operator()( SerialOpTag, const index_type i ) const + { + int imin, imax, jmin, jmax, kmin, kmax; + _list.getStencilCells( _cells( i ), imin, imax, jmin, jmax, kmin, + kmax ); + + // Loop over the cell stencil. + for ( int gi = imin; gi < imax; ++gi ) + for ( int gj = jmin; gj < jmax; ++gj ) + for ( int gk = kmin; gk < kmax; ++gk ) + { + // Check the particles in this bin to see if they are + // neighbors. + auto offset = _list.binOffset( gi, gj, gk ); + auto size = _list.binSize( gi, gj, gk ); + for ( std::size_t j = offset; j < offset + size; ++j ) + { + // Get the true id of the candidate neighbor. + std::size_t jj; + if ( !_list.sorted() ) + jj = _list.permutation( j ); + else + jj = j; + // Avoid self interactions (dummy position args). + if ( _discriminator.isValid( i, 0, 0, 0, jj, 0, 0, 0 ) ) + { + Impl::functorTagDispatch( _functor, i, + jj ); + } + } + } + } + + //! Performs a loop over the particles in neighboring bins in TeamOp + KOKKOS_FUNCTION void + operator()( TeamOpTag, const typename Policy::member_type& team ) const + { + index_type i = team.league_rank() + _begin; + int imin, imax, jmin, jmax, kmin, kmax; + _list.getStencilCells( _cells( i ), imin, imax, jmin, jmax, kmin, + kmax ); + + // Loop over the cell stencil. + for ( int gi = imin; gi < imax; ++gi ) + for ( int gj = jmin; gj < jmax; ++gj ) + for ( int gk = kmin; gk < kmax; ++gk ) + { + // Check the particles in this bin to see if they + // are neighbors. + auto offset = _list.binOffset( gi, gj, gk ); + auto size = _list.binSize( gi, gj, gk ); + Kokkos::parallel_for( + Kokkos::TeamThreadRange( team, offset, offset + size ), + [&]( const index_type j ) + { + // Get the true id of the candidate neighbor. + std::size_t jj; + if ( !_list.sorted() ) + jj = _list.permutation( j ); + else + jj = j; + // Avoid self interactions (dummy position args). + if ( _discriminator.isValid( i, 0, 0, 0, jj, 0, 0, + 0 ) ) + { + Impl::functorTagDispatch( _functor, i, + jj ); + } + } ); + } + }; +}; + +/*! + \brief Struct for performing a loop over linked cell bins + and then the particles in those bins on device +*/ +template +struct LinkedCellParallelReduce +{ + //! index type to be used for _begin + using index_type = typename Policy::index_type; + + //! Execution policy + Policy _exec_policy; + //! Functor to execute + Functor _functor; + //! Linked cell list + LinkedCellType _list; + //! result of getStencilCells + ViewType _cells; + + //! beginning index of the loop + index_type _begin; + + //! discriminator for whether a particle is a neighbor or not + NeighborDiscriminator _discriminator; + + //! Constructor + LinkedCellParallelReduce( std::string label, Policy exec_policy, + Functor functor, const LinkedCellType& list, + ViewType cells, ReduceType& reduce_val, + const index_type begin = 0 ) + : _exec_policy( exec_policy ) + , _functor( functor ) + , _list( list ) + , _cells( cells ) + , _begin( begin ) + { + if ( label.empty() ) + Kokkos::parallel_reduce( dynamic_cast( exec_policy ), + *this, reduce_val ); + else + Kokkos::parallel_reduce( label, + dynamic_cast( exec_policy ), + *this, reduce_val ); + } + + //! Performs a loop over the particles in neighboring bins in Serial + KOKKOS_FUNCTION void operator()( SerialOpTag, const index_type i, + ReduceType& ival ) const + { + int imin, imax, jmin, jmax, kmin, kmax; + _list.getStencilCells( _cells( i ), imin, imax, jmin, jmax, kmin, + kmax ); + + // Loop over the cell stencil. + for ( int gi = imin; gi < imax; ++gi ) + for ( int gj = jmin; gj < jmax; ++gj ) + for ( int gk = kmin; gk < kmax; ++gk ) + { + // Check the particles in this bin to see if they are + // neighbors. + auto offset = _list.binOffset( gi, gj, gk ); + auto size = _list.binSize( gi, gj, gk ); + for ( std::size_t j = offset; j < offset + size; ++j ) + { + // Get the true id of the candidate neighbor. + std::size_t jj; + if ( !_list.sorted() ) + jj = _list.permutation( j ); + else + jj = j; + // Avoid self interactions (dummy position args). + if ( _discriminator.isValid( i, 0, 0, 0, jj, 0, 0, 0 ) ) + { + Impl::functorTagDispatch( _functor, i, jj, + ival ); + } + } + } + } + + //! Performs a loop over the particles in neighboring bins in TeamOp + KOKKOS_FUNCTION void operator()( TeamOpTag, + const typename Policy::member_type& team, + ReduceType& ival ) const + { + index_type i = team.league_rank() + _begin; + int imin, imax, jmin, jmax, kmin, kmax; + _list.getStencilCells( _cells( i ), imin, imax, jmin, jmax, kmin, + kmax ); + + // Loop over the cell stencil. + for ( int gi = imin; gi < imax; ++gi ) + for ( int gj = jmin; gj < jmax; ++gj ) + for ( int gk = kmin; gk < kmax; ++gk ) + { + // Check the particles in this bin to see if they + // are neighbors. + auto offset = _list.binOffset( gi, gj, gk ); + auto size = _list.binSize( gi, gj, gk ); + Kokkos::parallel_for( + Kokkos::TeamThreadRange( team, offset, offset + size ), + [&]( const index_type j ) + { + // Get the true id of the candidate neighbor. + std::size_t jj; + if ( !_list.sorted() ) + jj = _list.permutation( j ); + else + jj = j; + // Avoid self interactions (dummy position args). + if ( _discriminator.isValid( i, 0, 0, 0, jj, 0, 0, + 0 ) ) + { + Impl::functorTagDispatch( _functor, i, + jj, ival ); + } + } ); + } + }; +}; +//! \endcond +} // namespace Impl + +//---------------------------------------------------------------------------// +/*! + \brief Execute functor in parallel according to the execution policy over + particles with thread-local serial loops over linked cell list bins and + particle first neighbors in those bins. + + \tparam FunctorType The functor type to execute. + \tparam NeighborListType The neighbor list type. + \tparam ExecParams The Kokkos range policy parameters. + + \param exec_policy The policy over which to execute the functor. + \param functor The functor to execute in parallel + \param list The linked cell list over which to execute neighbor operations. + \param FirstNeighborsTag Tag indicating operations over particle first + neighbors. + \param SerialOpTag Tag indicating a serial loop strategy over neighbors. + \param str Optional name for the functor. Will be forwarded if non-empty to + the Kokkos::parallel_for called by this code and can be used for + identification and profiling purposes. + + A "functor" is a class containing the function to execute in parallel, data + needed for that execution, and an optional \c execution_space typedef. Here + is an example functor for neighbor parallel_for: + + \code + class FunctorType { + public: + typedef ... execution_space ; + void operator() ( const int particle_index, const int neighbor_index ) const ; + }; + \endcode + + In the above example, \c Index is a Cabana index to a given AoSoA element + for a particle and its neighbor. Its operator() method defines the + operation to parallelize, over the range of indices + idx=[begin,end]. This compares to a single iteration \c idx of a + \c for loop. +*/ + +template +inline void linked_cell_parallel_for( + const Kokkos::RangePolicy& exec_policy, + const FunctorType& functor, const LinkedCellType& list, + const FirstNeighborsTag, const SerialOpTag, const std::string& str = "", + typename std::enable_if<( is_linked_cell_list::value ), + int>::type* = 0 ) +{ + using work_tag = typename Kokkos::RangePolicy::work_tag; + using execution_space = + typename Kokkos::RangePolicy::execution_space; + + using memory_space = typename LinkedCellType::memory_space; + + auto begin = exec_policy.begin(); + auto end = exec_policy.end(); + using linear_policy_type = + Kokkos::RangePolicy; + linear_policy_type linear_exec_policy( begin, end ); + + static_assert( is_accessible_from{}, "" ); + + auto particle_bins = list.getParticleBins(); + + Impl::LinkedCellParallelFor + lcl_par( str, linear_exec_policy, functor, list, particle_bins ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Execute functor in parallel according to the execution policy over + particles with thread-local serial loops over linked cell list bins and + team threading over particle first neighbors in those bins. + + \tparam FunctorType The functor type to execute. + \tparam NeighborListType The neighbor list type. + \tparam ExecParams The Kokkos range policy parameters. + + \param exec_policy The policy over which to execute the functor. + \param functor The functor to execute in parallel + \param list The linked cell list over which to execute neighbor operations. + \param FirstNeighborsTag Tag indicating operations over particle first + neighbors. + \param TeamOpTag Tag indicating a team parallel strategy over particle + neighbors. + \param str Optional name for the functor. Will be forwarded if non-empty to + the Kokkos::parallel_for called by this code and can be used for + identification and profiling purposes. +*/ + +template +inline void linked_cell_parallel_for( + const Kokkos::RangePolicy& exec_policy, + const FunctorType& functor, const LinkedCellType& list, + const FirstNeighborsTag, const TeamOpTag, const std::string& str = "", + typename std::enable_if<( is_linked_cell_list::value ), + int>::type* = 0 ) +{ + using work_tag = typename Kokkos::RangePolicy::work_tag; + using execution_space = + typename Kokkos::RangePolicy::execution_space; + + using team_policy_type = + Kokkos::TeamPolicy>; + team_policy_type team_policy( exec_policy.end() - exec_policy.begin(), + Kokkos::AUTO ); + + using memory_space = typename LinkedCellType::memory_space; + + static_assert( is_accessible_from{}, "" ); + + auto particle_bins = list.getParticleBins(); + + Impl::LinkedCellParallelFor + lcl_par( str, team_policy, functor, list, particle_bins, + exec_policy.begin() ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Execute functor in parallel according to the execution policy over + particles with thread-local serial loops over linked cell list bins and + particle first neighbors in those bins. + + \tparam FunctorType The functor type to execute. + \tparam NeighborListType The neighbor list type. + \tparam ExecParams The Kokkos range policy parameters. + + \param exec_policy The policy over which to execute the functor. + \param functor The functor to execute in parallel + \param list The linked cell list over which to execute neighbor operations. + \param FirstNeighborsTag Tag indicating operations over particle first + neighbors. + \param SerialOpTag Tag indicating a serial loop strategy over neighbors. + \param reduce_val the value begin reduced + \param str Optional name for the functor. Will be forwarded if non-empty to + the Kokkos::parallel_for called by this code and can be used for + identification and profiling purposes. + + A "functor" is a class containing the function to execute in parallel, data + needed for that execution, and an optional \c execution_space typedef. Here + is an example functor for neighbor parallel_for: + + \code + class FunctorType { + public: + typedef ... execution_space ; + void operator() ( const int particle_index, const int neighbor_index ) const ; + }; + \endcode + + In the above example, \c Index is a Cabana index to a given AoSoA element + for a particle and its neighbor. Its operator() method defines the + operation to parallelize, over the range of indices + idx=[begin,end]. This compares to a single iteration \c idx of a + \c for loop. +*/ +template +inline void linked_cell_parallel_reduce( + const Kokkos::RangePolicy& exec_policy, + const FunctorType& functor, const LinkedCellType& list, + const FirstNeighborsTag, const SerialOpTag, ReduceType& reduce_val, + const std::string& str = "", + typename std::enable_if<( is_linked_cell_list::value ), + int>::type* = 0 ) +{ + using work_tag = typename Kokkos::RangePolicy::work_tag; + using execution_space = + typename Kokkos::RangePolicy::execution_space; + + using memory_space = typename LinkedCellType::memory_space; + + auto begin = exec_policy.begin(); + auto end = exec_policy.end(); + using linear_policy_type = + Kokkos::RangePolicy; + linear_policy_type linear_exec_policy( begin, end ); + + static_assert( is_accessible_from{}, "" ); + + auto particle_bins = list.getParticleBins(); + + Impl::LinkedCellParallelReduce< + work_tag, FunctorType, linear_policy_type, LinkedCellType, + typename LinkedCellType::CountView, ReduceType> + lcl_par( str, linear_exec_policy, functor, list, particle_bins, + reduce_val ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Execute functor in parallel according to the execution policy over + particles with thread-local serial loops over linked cell list bins and + team threading over particle first neighbors in those bins. + + \tparam FunctorType The functor type to execute. + \tparam NeighborListType The neighbor list type. + \tparam ExecParams The Kokkos range policy parameters. + + \param exec_policy The policy over which to execute the functor. + \param functor The functor to execute in parallel + \param list The linked cell list over which to execute neighbor operations. + \param FirstNeighborsTag Tag indicating operations over particle first + neighbors. + \param TeamOpTag Tag indicating a team parallel strategy over particle + neighbors. + \param reduce_val the value begin reduced + \param str Optional name for the functor. Will be forwarded if non-empty to + the Kokkos::parallel_for called by this code and can be used for + identification and profiling purposes. +*/ +template +inline void linked_cell_parallel_reduce( + const Kokkos::RangePolicy& exec_policy, + const FunctorType& functor, const LinkedCellType& list, + const FirstNeighborsTag, const TeamOpTag, ReduceType& reduce_val, + const std::string& str = "", + typename std::enable_if<( is_linked_cell_list::value ), + int>::type* = 0 ) +{ + using work_tag = typename Kokkos::RangePolicy::work_tag; + using execution_space = + typename Kokkos::RangePolicy::execution_space; + + using team_policy_type = + Kokkos::TeamPolicy>; + team_policy_type team_policy( exec_policy.end() - exec_policy.begin(), + Kokkos::AUTO ); + + using memory_space = typename LinkedCellType::memory_space; + + static_assert( is_accessible_from{}, "" ); + + auto particle_bins = list.getParticleBins(); + + Impl::LinkedCellParallelReduce< + work_tag, FunctorType, team_policy_type, LinkedCellType, + typename LinkedCellType::CountView, ReduceType> + lcl_par( str, team_policy, functor, list, particle_bins, reduce_val, + exec_policy.begin() ); +} + } // end namespace Cabana #endif // end CABANA_PARALLEL_HPP diff --git a/core/src/Cabana_VerletList.hpp b/core/src/Cabana_VerletList.hpp index 839180738..c42277829 100644 --- a/core/src/Cabana_VerletList.hpp +++ b/core/src/Cabana_VerletList.hpp @@ -19,7 +19,6 @@ #include #include #include -#include #include @@ -114,103 +113,11 @@ struct VerletListData //---------------------------------------------------------------------------// -namespace Impl -{ -//! \cond Impl - //---------------------------------------------------------------------------// -// Neighborhood discriminator. -template -class NeighborDiscriminator; - -// Full list specialization. -template <> -class NeighborDiscriminator -{ - public: - // Full neighbor lists count and store the neighbors of all - // particles. The only criteria for a potentially valid neighbor is - // that the particle does not neighbor itself (i.e. the particle index - // "p" is not the same as the neighbor index "n"). - KOKKOS_INLINE_FUNCTION - static bool isValid( const std::size_t p, const double, const double, - const double, const std::size_t n, const double, - const double, const double ) - { - return ( p != n ); - } -}; - -// Half list specialization. -template <> -class NeighborDiscriminator -{ - public: - // Half neighbor lists only store half of the neighbors be eliminating - // duplicate pairs such that the fact that particle "p" neighbors - // particle "n" is stored in the list but "n" neighboring "p" is not - // stored but rather implied. We discriminate by only storing neighbors - // who's coordinates are greater in the x direction. If they are the same - // then the y direction is checked next and finally the z direction if the - // y coordinates are the same. - KOKKOS_INLINE_FUNCTION - static bool isValid( const std::size_t p, const double xp, const double yp, - const double zp, const std::size_t n, const double xn, - const double yn, const double zn ) - { - return ( ( p != n ) && - ( ( xn > xp ) || - ( ( xn == xp ) && - ( ( yn > yp ) || ( ( yn == yp ) && ( zn > zp ) ) ) ) ) ); - } -}; -//---------------------------------------------------------------------------// -// Cell stencil. -template -struct LinkedCellStencil +namespace Impl { - Scalar rsqr; - CartesianGrid grid; - int max_cells_dir; - int max_cells; - int cell_range; - - LinkedCellStencil( const Scalar neighborhood_radius, - const Scalar cell_size_ratio, const Scalar grid_min[3], - const Scalar grid_max[3] ) - : rsqr( neighborhood_radius * neighborhood_radius ) - { - Scalar dx = neighborhood_radius * cell_size_ratio; - grid = CartesianGrid( grid_min[0], grid_min[1], grid_min[2], - grid_max[0], grid_max[1], grid_max[2], dx, - dx, dx ); - cell_range = std::ceil( 1 / cell_size_ratio ); - max_cells_dir = 2 * cell_range + 1; - max_cells = max_cells_dir * max_cells_dir * max_cells_dir; - } - - // Given a cell, get the index bounds of the cell stencil. - KOKKOS_INLINE_FUNCTION - void getCells( const int cell, int& imin, int& imax, int& jmin, int& jmax, - int& kmin, int& kmax ) const - { - int i, j, k; - grid.ijkBinIndex( cell, i, j, k ); - - kmin = ( k - cell_range > 0 ) ? k - cell_range : 0; - kmax = - ( k + cell_range + 1 < grid._nz ) ? k + cell_range + 1 : grid._nz; - - jmin = ( j - cell_range > 0 ) ? j - cell_range : 0; - jmax = - ( j + cell_range + 1 < grid._ny ) ? j + cell_range + 1 : grid._ny; - - imin = ( i - cell_range > 0 ) ? i - cell_range : 0; - imax = - ( i + cell_range + 1 < grid._nx ) ? i + cell_range + 1 : grid._nx; - } -}; +//! \cond Impl //---------------------------------------------------------------------------// // Verlet List Builder @@ -239,10 +146,7 @@ struct VerletListBuilder // Binning Data. BinningData bin_data_1d; - LinkedCellList linked_cell_list; - - // Cell stencil. - LinkedCellStencil cell_stencil; + LinkedCellList linked_cell_list; // Check to count or refill. bool refill; @@ -261,8 +165,6 @@ struct VerletListBuilder const std::size_t max_neigh ) : pid_begin( begin ) , pid_end( end ) - , cell_stencil( neighborhood_radius, cell_size_ratio, grid_min, - grid_max ) , alloc_n( max_neigh ) { count = true; @@ -284,8 +186,9 @@ struct VerletListBuilder // treated as candidates for neighbors. double grid_size = cell_size_ratio * neighborhood_radius; PositionValueType grid_delta[3] = { grid_size, grid_size, grid_size }; - linked_cell_list = LinkedCellList( position, grid_delta, - grid_min, grid_max ); + linked_cell_list = createLinkedCellList( + position, grid_delta, grid_min, grid_max, neighborhood_radius, + cell_size_ratio ); bin_data_1d = linked_cell_list.binningData(); // We will use the square of the distance for neighbor determination. @@ -312,7 +215,8 @@ struct VerletListBuilder // Get the stencil for this cell. int imin, imax, jmin, jmax, kmin, kmax; - cell_stencil.getCells( cell, imin, imax, jmin, jmax, kmin, kmax ); + linked_cell_list.getStencilCells( cell, imin, imax, jmin, jmax, kmin, + kmax ); // Operate on the particles in the bin. std::size_t b_offset = bin_data_1d.binOffset( cell ); @@ -339,8 +243,9 @@ struct VerletListBuilder { // See if we should actually check this box for // neighbors. - if ( cell_stencil.grid.minDistanceToPoint( - x_p, y_p, z_p, i, j, k ) <= rsqr ) + if ( linked_cell_list.cellStencil() + .grid.minDistanceToPoint( + x_p, y_p, z_p, i, j, k ) <= rsqr ) { std::size_t n_offset = linked_cell_list.binOffset( i, j, k ); @@ -528,7 +433,8 @@ struct VerletListBuilder // Get the stencil for this cell. int imin, imax, jmin, jmax, kmin, kmax; - cell_stencil.getCells( cell, imin, imax, jmin, jmax, kmin, kmax ); + linked_cell_list.getStencilCells( cell, imin, imax, jmin, jmax, kmin, + kmax ); // Operate on the particles in the bin. std::size_t b_offset = bin_data_1d.binOffset( cell ); @@ -554,8 +460,9 @@ struct VerletListBuilder { // See if we should actually check this box for // neighbors. - if ( cell_stencil.grid.minDistanceToPoint( - x_p, y_p, z_p, i, j, k ) <= rsqr ) + if ( linked_cell_list.cellStencil() + .grid.minDistanceToPoint( + x_p, y_p, z_p, i, j, k ) <= rsqr ) { // Check the particles in this bin to see if // they are neighbors. diff --git a/core/unit_test/tstLinkedCellList.hpp b/core/unit_test/tstLinkedCellList.hpp index 9c51ca990..1c0cbc0c5 100644 --- a/core/unit_test/tstLinkedCellList.hpp +++ b/core/unit_test/tstLinkedCellList.hpp @@ -13,10 +13,15 @@ #include #include +#include + #include namespace Test { +//---------------------------------------------------------------------------// +// Test data +//---------------------------------------------------------------------------// struct LCLTestData { enum MyFields @@ -95,8 +100,9 @@ struct LCLTestData } }; -void copyListToHost( LCLTestData& test_data, - const Cabana::LinkedCellList cell_list ) +void copyListToHost( + LCLTestData& test_data, + const Cabana::LinkedCellList cell_list ) { // Copy data to the host for testing. auto np = test_data.num_p; @@ -136,8 +142,11 @@ void copyListToHost( LCLTestData& test_data, Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), bin_offset ); } +//---------------------------------------------------------------------------// +// Main Linked Cell List tests +//---------------------------------------------------------------------------// void checkBins( const LCLTestData test_data, - const Cabana::LinkedCellList cell_list ) + const Cabana::LinkedCellList cell_list ) { auto nx = test_data.nx; @@ -236,6 +245,85 @@ void checkLinkedCell( const LCLTestData test_data, } } } + +//---------------------------------------------------------------------------// +// Linked cell list cell stencil test. + +void testLinkedCellStencil() +{ + // Point in the middle + { + double min[3] = { 0.0, 0.0, 0.0 }; + double max[3] = { 10.0, 10.0, 10.0 }; + double radius = 1.0; + double ratio = 1.0; + Cabana::LinkedCellStencil stencil( radius, ratio, min, max ); + + double xp = 4.5; + double yp = 5.5; + double zp = 3.5; + int ic, jc, kc; + stencil.grid.locatePoint( xp, yp, zp, ic, jc, kc ); + int cell = stencil.grid.cardinalCellIndex( ic, jc, kc ); + int imin, imax, jmin, jmax, kmin, kmax; + stencil.getCells( cell, imin, imax, jmin, jmax, kmin, kmax ); + EXPECT_EQ( imin, 3 ); + EXPECT_EQ( imax, 6 ); + EXPECT_EQ( jmin, 4 ); + EXPECT_EQ( jmax, 7 ); + EXPECT_EQ( kmin, 2 ); + EXPECT_EQ( kmax, 5 ); + } + + // Point in the lower right corner + { + double min[3] = { 0.0, 0.0, 0.0 }; + double max[3] = { 10.0, 10.0, 10.0 }; + double radius = 1.0; + double ratio = 1.0; + Cabana::LinkedCellStencil stencil( radius, ratio, min, max ); + + double xp = 0.5; + double yp = 0.5; + double zp = 0.5; + int ic, jc, kc; + stencil.grid.locatePoint( xp, yp, zp, ic, jc, kc ); + int cell = stencil.grid.cardinalCellIndex( ic, jc, kc ); + int imin, imax, jmin, jmax, kmin, kmax; + stencil.getCells( cell, imin, imax, jmin, jmax, kmin, kmax ); + EXPECT_EQ( imin, 0 ); + EXPECT_EQ( imax, 2 ); + EXPECT_EQ( jmin, 0 ); + EXPECT_EQ( jmax, 2 ); + EXPECT_EQ( kmin, 0 ); + EXPECT_EQ( kmax, 2 ); + } + + // Point in the upper left corner + { + double min[3] = { 0.0, 0.0, 0.0 }; + double max[3] = { 10.0, 10.0, 10.0 }; + double radius = 1.0; + double ratio = 1.0; + Cabana::LinkedCellStencil stencil( radius, ratio, min, max ); + + double xp = 9.5; + double yp = 9.5; + double zp = 9.5; + int ic, jc, kc; + stencil.grid.locatePoint( xp, yp, zp, ic, jc, kc ); + int cell = stencil.grid.cardinalCellIndex( ic, jc, kc ); + int imin, imax, jmin, jmax, kmin, kmax; + stencil.getCells( cell, imin, imax, jmin, jmax, kmin, kmax ); + EXPECT_EQ( imin, 8 ); + EXPECT_EQ( imax, 10 ); + EXPECT_EQ( jmin, 8 ); + EXPECT_EQ( jmax, 10 ); + EXPECT_EQ( kmin, 8 ); + EXPECT_EQ( kmax, 10 ); + } +} + //---------------------------------------------------------------------------// void testLinkedList() { @@ -250,8 +338,9 @@ void testLinkedList() { auto begin = test_data.begin; auto end = test_data.end; - Cabana::LinkedCellList cell_list( - pos, begin, end, grid_delta, grid_min, grid_max ); + auto cell_list = Cabana::createLinkedCellList( + pos, begin, end, grid_delta, grid_min, grid_max, + test_data.grid_delta[0], 1.0 ); Cabana::permute( cell_list, test_data.aosoa ); copyListToHost( test_data, cell_list ); @@ -262,8 +351,8 @@ void testLinkedList() // Now bin and permute all of the particles. { - Cabana::LinkedCellList cell_list( pos, grid_delta, - grid_min, grid_max ); + auto cell_list = Cabana::createLinkedCellList( + pos, grid_delta, grid_min, grid_max, test_data.grid_delta[0] ); Cabana::permute( cell_list, test_data.aosoa ); copyListToHost( test_data, cell_list ); @@ -287,8 +376,8 @@ void testLinkedListSlice() { auto begin = test_data.begin; auto end = test_data.end; - Cabana::LinkedCellList cell_list( - pos, begin, end, grid_delta, grid_min, grid_max ); + auto cell_list = Cabana::createLinkedCellList( + pos, begin, end, grid_delta, grid_min, grid_max, grid_delta[0] ); Cabana::permute( cell_list, pos ); copyListToHost( test_data, cell_list ); @@ -298,8 +387,8 @@ void testLinkedListSlice() } // Now bin and permute all of the particles. { - Cabana::LinkedCellList cell_list( pos, grid_delta, - grid_min, grid_max ); + auto cell_list = Cabana::createLinkedCellList( + pos, grid_delta, grid_min, grid_max, grid_delta[0] ); Cabana::permute( cell_list, pos ); copyListToHost( test_data, cell_list ); @@ -318,14 +407,300 @@ void testLinkedListSlice() } } +//---------------------------------------------------------------------------// +// linked_list_parallel +//---------------------------------------------------------------------------// +template +void checkLinkedCellParallel( const ListType& nlist, + const TestListType& N2_list_copy, + const int num_particle, + const PositionType positions, + const double cutoff ) +{ + // Create Kokkos views for the write operation. + using memory_space = typename TEST_MEMSPACE::memory_space; + Kokkos::View serial_result( "serial_result", + num_particle ); + Kokkos::View team_result( "team_result", num_particle ); + + // Test the list parallel operation by adding a value from each neighbor + // to the particle (within cutoff) and compare to counts. + auto c2 = cutoff * cutoff; + auto serial_count_op = KOKKOS_LAMBDA( const int i, const int j ) + { + const double dx = positions( i, 0 ) - positions( j, 0 ); + const double dy = positions( i, 1 ) - positions( j, 1 ); + const double dz = positions( i, 2 ) - positions( j, 2 ); + const double r2 = dx * dx + dy * dy + dz * dz; + if ( r2 <= c2 ) + { + if ( nlist.sorted() ) + { + Kokkos::atomic_add( &serial_result( nlist.permutation( i ) ), + nlist.permutation( j ) ); + } + else + { + Kokkos::atomic_add( &serial_result( i ), j ); + } + } + }; + auto team_count_op = KOKKOS_LAMBDA( const int i, const int j ) + { + const double dx = positions( i, 0 ) - positions( j, 0 ); + const double dy = positions( i, 1 ) - positions( j, 1 ); + const double dz = positions( i, 2 ) - positions( j, 2 ); + const double r2 = dx * dx + dy * dy + dz * dz; + if ( r2 <= c2 ) + { + if ( nlist.sorted() ) + { + Kokkos::atomic_add( &team_result( nlist.permutation( i ) ), + nlist.permutation( j ) ); + } + else + { + Kokkos::atomic_add( &team_result( i ), j ); + } + } + }; + Kokkos::RangePolicy policy( 0, num_particle ); + Cabana::linked_cell_parallel_for( + policy, serial_count_op, nlist, Cabana::FirstNeighborsTag(), + Cabana::SerialOpTag(), "test_1st_serial" ); + Cabana::linked_cell_parallel_for( policy, team_count_op, nlist, + Cabana::FirstNeighborsTag(), + Cabana::TeamOpTag(), "test_1st_team" ); + Kokkos::fence(); + + checkFirstNeighborParallelFor( N2_list_copy, serial_result, team_result, + 1 ); +} + +//---------------------------------------------------------------------------// +// linked_list_parallel +//---------------------------------------------------------------------------// +template +void checkLinkedCellNeighborParallel( const ListType& nlist, + const TestListType& N2_list_copy, + const int num_particle, + const PositionType positions, + const double cutoff ) +{ + // Create Kokkos views for the write operation. + using memory_space = typename TEST_MEMSPACE::memory_space; + Kokkos::View serial_result( "serial_result", + num_particle ); + Kokkos::View team_result( "team_result", num_particle ); + + // Test the list parallel operation by adding a value from each neighbor + // to the particle (within cutoff) and compare to counts. + auto c2 = cutoff * cutoff; + + Cabana::NeighborDiscriminator _discriminator; + + auto serial_count_op = KOKKOS_LAMBDA( const int i, const int j ) + { + const double dx = positions( i, 0 ) - positions( j, 0 ); + const double dy = positions( i, 1 ) - positions( j, 1 ); + const double dz = positions( i, 2 ) - positions( j, 2 ); + const double r2 = dx * dx + dy * dy + dz * dz; + if ( r2 <= c2 && _discriminator.isValid( i, 0, 0, 0, j, 0, 0, 0 ) ) + { + if ( nlist.sorted() ) + { + Kokkos::atomic_add( &serial_result( nlist.permutation( i ) ), + nlist.permutation( j ) ); + } + else + { + Kokkos::atomic_add( &serial_result( i ), j ); + } + } + }; + auto team_count_op = KOKKOS_LAMBDA( const int i, const int j ) + { + const double dx = positions( i, 0 ) - positions( j, 0 ); + const double dy = positions( i, 1 ) - positions( j, 1 ); + const double dz = positions( i, 2 ) - positions( j, 2 ); + const double r2 = dx * dx + dy * dy + dz * dz; + if ( r2 <= c2 && _discriminator.isValid( i, 0, 0, 0, j, 0, 0, 0 ) ) + { + if ( nlist.sorted() ) + { + Kokkos::atomic_add( &team_result( nlist.permutation( i ) ), + nlist.permutation( j ) ); + } + else + { + Kokkos::atomic_add( &team_result( i ), j ); + } + } + }; + Kokkos::RangePolicy policy( 0, num_particle ); + Cabana::neighbor_parallel_for( policy, serial_count_op, nlist, + Cabana::FirstNeighborsTag(), + Cabana::SerialOpTag(), "test_1st_serial" ); + Cabana::neighbor_parallel_for( policy, team_count_op, nlist, + Cabana::FirstNeighborsTag(), + Cabana::TeamOpTag(), "test_1st_team" ); + Kokkos::fence(); + + checkFirstNeighborParallelFor( N2_list_copy, serial_result, team_result, + 1 ); +} + +//---------------------------------------------------------------------------// +// linked_list_parallel +//---------------------------------------------------------------------------// +template +void checkLinkedCellReduce( const ListType& nlist, + const TestListType& N2_list_copy, + const AoSoAType& aosoa, const int num_particle, + const double cutoff ) +{ + auto position = Cabana::slice<0>( aosoa ); + + // Test the list parallel operation by adding a value from each neighbor + // to the particle (within cutoff) and compare to counts. + auto c2 = cutoff * cutoff; + auto sum_op = KOKKOS_LAMBDA( const int i, const int j, double& sum ) + { + const double dx = position( i, 0 ) - position( j, 0 ); + const double dy = position( i, 1 ) - position( j, 1 ); + const double dz = position( i, 2 ) - position( j, 2 ); + const double r2 = dx * dx + dy * dy + dz * dz; + if ( r2 <= c2 ) + { + if ( nlist.sorted() ) + { + sum += position( nlist.permutation( i ), 0 ) + + position( nlist.permutation( j ), 0 ); + } + else + { + sum += position( i, 0 ) + position( j, 0 ); + } + } + }; + Kokkos::RangePolicy policy( 0, num_particle ); + double serial_sum = 0; + Cabana::linked_cell_parallel_reduce( + policy, sum_op, nlist, Cabana::FirstNeighborsTag(), + Cabana::SerialOpTag(), serial_sum, "test_1st_serial" ); + double team_sum = 0; + Cabana::linked_cell_parallel_reduce( + policy, sum_op, nlist, Cabana::FirstNeighborsTag(), Cabana::TeamOpTag(), + team_sum, "test_1st_team" ); + Kokkos::fence(); + + checkFirstNeighborParallelReduce( N2_list_copy, aosoa, serial_sum, team_sum, + 1 ); +} + +//---------------------------------------------------------------------------// +// linked_list_parallel +//---------------------------------------------------------------------------// +template +void checkLinkedCellNeighborReduce( const ListType& nlist, + const TestListType& N2_list_copy, + const AoSoAType aosoa, + const int num_particle, + const double cutoff ) +{ + auto position = Cabana::slice<0>( aosoa ); + + // Test the list parallel operation by adding a value from each neighbor + // to the particle (within cutoff) and compare to counts. + auto c2 = cutoff * cutoff; + + Cabana::NeighborDiscriminator _discriminator; + + auto sum_op = KOKKOS_LAMBDA( const int i, const int j, double& sum ) + { + const double dx = position( i, 0 ) - position( j, 0 ); + const double dy = position( i, 1 ) - position( j, 1 ); + const double dz = position( i, 2 ) - position( j, 2 ); + const double r2 = dx * dx + dy * dy + dz * dz; + if ( r2 <= c2 && _discriminator.isValid( i, 0, 0, 0, j, 0, 0, 0 ) ) + { + if ( nlist.sorted() ) + { + sum += position( nlist.permutation( i ), 0 ) + + position( nlist.permutation( j ), 0 ); + } + else + { + sum += position( i, 0 ) + position( j, 0 ); + } + } + }; + Kokkos::RangePolicy policy( 0, num_particle ); + double serial_sum = 0; + Cabana::neighbor_parallel_reduce( + policy, sum_op, nlist, Cabana::FirstNeighborsTag(), + Cabana::SerialOpTag(), serial_sum, "test_1st_serial" ); + double team_sum = 0; + Cabana::neighbor_parallel_reduce( + policy, sum_op, nlist, Cabana::FirstNeighborsTag(), Cabana::TeamOpTag(), + team_sum, "test_1st_team" ); + Kokkos::fence(); + + checkFirstNeighborParallelReduce( N2_list_copy, aosoa, serial_sum, team_sum, + 1 ); +} + +//---------------------------------------------------------------------------// +void testLinkedCellParallel() +{ + // Create the AoSoA and fill with random particle positions. + NeighborListTestData test_data; + auto positions = Cabana::slice<0>( test_data.aosoa ); + + // Create the linked cell list. + double grid_size = test_data.cell_size_ratio * test_data.test_radius; + double grid_delta[3] = { grid_size, grid_size, grid_size }; + auto nlist = Cabana::createLinkedCellList( + positions, grid_delta, test_data.grid_min, test_data.grid_max, + test_data.test_radius, test_data.cell_size_ratio ); + + checkLinkedCellParallel( nlist, test_data.N2_list_copy, + test_data.num_particle, positions, + test_data.test_radius ); + checkLinkedCellNeighborParallel( nlist, test_data.N2_list_copy, + test_data.num_particle, positions, + test_data.test_radius ); + checkLinkedCellReduce( nlist, test_data.N2_list_copy, test_data.aosoa, + test_data.num_particle, test_data.test_radius ); + checkLinkedCellNeighborReduce( nlist, test_data.N2_list_copy, + test_data.aosoa, test_data.num_particle, + test_data.test_radius ); + + Cabana::permute( nlist, positions ); + checkLinkedCellParallel( nlist, test_data.N2_list_copy, + test_data.num_particle, positions, + test_data.test_radius ); + checkLinkedCellNeighborParallel( nlist, test_data.N2_list_copy, + test_data.num_particle, positions, + test_data.test_radius ); + checkLinkedCellReduce( nlist, test_data.N2_list_copy, test_data.aosoa, + test_data.num_particle, test_data.test_radius ); + checkLinkedCellNeighborReduce( nlist, test_data.N2_list_copy, + test_data.aosoa, test_data.num_particle, + test_data.test_radius ); +} + //---------------------------------------------------------------------------// // RUN TESTS //---------------------------------------------------------------------------// +TEST( TEST_CATEGORY, linked_cell_stencil_test ) { testLinkedCellStencil(); } + TEST( TEST_CATEGORY, linked_list_test ) { testLinkedList(); } -//---------------------------------------------------------------------------// TEST( TEST_CATEGORY, linked_list_slice_test ) { testLinkedListSlice(); } +TEST( TEST_CATEGORY, linked_list_parallel_test ) { testLinkedCellParallel(); } + //---------------------------------------------------------------------------// } // end namespace Test diff --git a/core/unit_test/tstNeighborList.hpp b/core/unit_test/tstNeighborList.hpp index 1bb28fd8c..a0aeabd98 100644 --- a/core/unit_test/tstNeighborList.hpp +++ b/core/unit_test/tstNeighborList.hpp @@ -22,85 +22,6 @@ namespace Test { -//---------------------------------------------------------------------------// -// Linked cell list cell stencil test. -void testLinkedCellStencil() -{ - // Point in the middle - { - double min[3] = { 0.0, 0.0, 0.0 }; - double max[3] = { 10.0, 10.0, 10.0 }; - double radius = 1.0; - double ratio = 1.0; - Cabana::Impl::LinkedCellStencil stencil( radius, ratio, min, - max ); - - double xp = 4.5; - double yp = 5.5; - double zp = 3.5; - int ic, jc, kc; - stencil.grid.locatePoint( xp, yp, zp, ic, jc, kc ); - int cell = stencil.grid.cardinalCellIndex( ic, jc, kc ); - int imin, imax, jmin, jmax, kmin, kmax; - stencil.getCells( cell, imin, imax, jmin, jmax, kmin, kmax ); - EXPECT_EQ( imin, 3 ); - EXPECT_EQ( imax, 6 ); - EXPECT_EQ( jmin, 4 ); - EXPECT_EQ( jmax, 7 ); - EXPECT_EQ( kmin, 2 ); - EXPECT_EQ( kmax, 5 ); - } - - // Point in the lower right corner - { - double min[3] = { 0.0, 0.0, 0.0 }; - double max[3] = { 10.0, 10.0, 10.0 }; - double radius = 1.0; - double ratio = 1.0; - Cabana::Impl::LinkedCellStencil stencil( radius, ratio, min, - max ); - - double xp = 0.5; - double yp = 0.5; - double zp = 0.5; - int ic, jc, kc; - stencil.grid.locatePoint( xp, yp, zp, ic, jc, kc ); - int cell = stencil.grid.cardinalCellIndex( ic, jc, kc ); - int imin, imax, jmin, jmax, kmin, kmax; - stencil.getCells( cell, imin, imax, jmin, jmax, kmin, kmax ); - EXPECT_EQ( imin, 0 ); - EXPECT_EQ( imax, 2 ); - EXPECT_EQ( jmin, 0 ); - EXPECT_EQ( jmax, 2 ); - EXPECT_EQ( kmin, 0 ); - EXPECT_EQ( kmax, 2 ); - } - - // Point in the upper left corner - { - double min[3] = { 0.0, 0.0, 0.0 }; - double max[3] = { 10.0, 10.0, 10.0 }; - double radius = 1.0; - double ratio = 1.0; - Cabana::Impl::LinkedCellStencil stencil( radius, ratio, min, - max ); - - double xp = 9.5; - double yp = 9.5; - double zp = 9.5; - int ic, jc, kc; - stencil.grid.locatePoint( xp, yp, zp, ic, jc, kc ); - int cell = stencil.grid.cardinalCellIndex( ic, jc, kc ); - int imin, imax, jmin, jmax, kmin, kmax; - stencil.getCells( cell, imin, imax, jmin, jmax, kmin, kmax ); - EXPECT_EQ( imin, 8 ); - EXPECT_EQ( imax, 10 ); - EXPECT_EQ( jmin, 8 ); - EXPECT_EQ( jmax, 10 ); - EXPECT_EQ( kmin, 8 ); - EXPECT_EQ( kmax, 10 ); - } -} //---------------------------------------------------------------------------// template @@ -326,9 +247,7 @@ void testModifyNeighbors() //---------------------------------------------------------------------------// // TESTS //---------------------------------------------------------------------------// -TEST( TEST_CATEGORY, linked_cell_stencil_test ) { testLinkedCellStencil(); } -//---------------------------------------------------------------------------// TEST( TEST_CATEGORY, verlet_list_full_test ) { #ifndef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET diff --git a/example/core_tutorial/08_linked_cell_list/linked_cell_list_example.cpp b/example/core_tutorial/08_linked_cell_list/linked_cell_list_example.cpp index 1560a08a3..ca00f913c 100644 --- a/example/core_tutorial/08_linked_cell_list/linked_cell_list_example.cpp +++ b/example/core_tutorial/08_linked_cell_list/linked_cell_list_example.cpp @@ -105,12 +105,11 @@ void linkedCellListExample() exercise, try adding both start and end (int) inputs to define a subset range of particles to permute: - Cabana::LinkedCellList cell_list( positions, start, end, - grid_delta, - grid_min, grid_max ); + auto cell_list = Cabana::createLinkedCellList( + positions, start, end, grid_delta, grid_min, grid_max ); */ - Cabana::LinkedCellList cell_list( positions, grid_delta, - grid_min, grid_max ); + auto cell_list = Cabana::createLinkedCellList( + positions, grid_delta, grid_min, grid_max ); /* Now permute the AoSoA (i.e. reorder the data) using the linked cell list.