Skip to content

Commit

Permalink
Merge Pull Request trilinos#13551 from trilinos/Trilinos/master_merge…
Browse files Browse the repository at this point in the history
…_20241025_175850

Automatically Merged using Trilinos Master Merge AutoTester
PR Title: b'Trilinos Master Merge PR Generator: Auto PR created to promote from master_merge_20241025_175850 branch to master'
PR Author: trilinos-autotester
  • Loading branch information
trilinos-autotester authored Oct 27, 2024
2 parents fb58dc8 + ed0ae91 commit e7fd307
Show file tree
Hide file tree
Showing 133 changed files with 3,012 additions and 1,249 deletions.
2 changes: 2 additions & 0 deletions packages/amesos2/src/Amesos2_Tacho_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ class TachoSolver : public SolverCore<Amesos2::TachoSolver, Matrix, Vector>
int method;
int variant;
int small_problem_threshold_size;
int streams;
bool verbose;
// int num_kokkos_threads;
// int max_num_superblocks;
} data_;
Expand Down
15 changes: 12 additions & 3 deletions packages/amesos2/src/Amesos2_Tacho_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ TachoSolver<Matrix,Vector>::TachoSolver(
Teuchos::RCP<const Vector> B )
: SolverCore<Amesos2::TachoSolver,Matrix,Vector>(A, X, B)
{
data_.method = 1; // Cholesky
data_.variant = 2; // solver variant
data_.method = 1; // Cholesky
data_.variant = 2; // solver variant
data_.streams = 1; // # of streams
data_.verbose = false; // verbose
}


Expand Down Expand Up @@ -74,7 +76,8 @@ TachoSolver<Matrix,Vector>::symbolicFactorization_impl()
data_.solver.setSolutionMethod(data_.method);
data_.solver.setLevelSetOptionAlgorithmVariant(data_.variant);
data_.solver.setSmallProblemThresholdsize(data_.small_problem_threshold_size);

data_.solver.setVerbose(data_.verbose);
data_.solver.setLevelSetOptionNumStreams(data_.streams);
// TODO: Confirm param options
// data_.solver.setMaxNumberOfSuperblocks(data_.max_num_superblocks);

Expand Down Expand Up @@ -216,6 +219,10 @@ TachoSolver<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::Param
data_.variant = parameterList->get<int> ("variant", 2);
// small problem threshold
data_.small_problem_threshold_size = parameterList->get<int> ("small problem threshold size", 1024);
// verbosity
data_.verbose = parameterList->get<bool> ("verbose", false);
// # of streams
data_.streams = parameterList->get<int> ("num-streams", 1);
// TODO: Confirm param options
// data_.num_kokkos_threads = parameterList->get<int>("kokkos-threads", 1);
// data_.max_num_superblocks = parameterList->get<int>("max-num-superblocks", 4);
Expand All @@ -234,6 +241,8 @@ TachoSolver<Matrix,Vector>::getValidParameters_impl() const
pl->set("method", "chol", "Type of factorization, chol, ldl, or lu");
pl->set("variant", 2, "Type of solver variant, 0, 1, or 2");
pl->set("small problem threshold size", 1024, "Problem size threshold below with Tacho uses LAPACK.");
pl->set("verbose", false, "Verbosity");
pl->set("num-streams", 1, "Number of GPU streams");

// TODO: Confirm param options
// pl->set("kokkos-threads", 1, "Number of threads");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ IF (${PACKAGE_NAME}_ENABLE_Amesos2)
)

## These are regression tests, that also compare residual convergence history
ASSERT_DEFINED(PYTHONINTERP_FOUND)
IF (PYTHONINTERP_FOUND)
ASSERT_DEFINED(Python3_EXECUTABLE)
IF (Python3_EXECUTABLE)

TRIBITS_ADD_ADVANCED_TEST(
Structured_Region_Star2D_Tpetra_GS_MPI_4_regression
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ void AggregationPhase1Algorithm<LocalOrdinal, GlobalOrdinal, Node>::
break;
} else {
// Decrement back the value of aggSizesView(agg)
Kokkos::atomic_decrement(&aggSizesView(agg));
Kokkos::atomic_dec(&aggSizesView(agg));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ void AggregationPhase3Algorithm<LocalOrdinal, GlobalOrdinal, Node>::
procWinner(nodeIdx, 0) = myRank;
vertex2AggId(nodeIdx, 0) = aggId;
// aggregates.SetIsRoot(nodeIdx);
Kokkos::atomic_decrement(&numNonAggregated());
Kokkos::atomic_dec(&numNonAggregated());
for (int neigh = 0; neigh < neighbors.length; ++neigh) {
neighIdx = neighbors(neigh);
if ((neighIdx != nodeIdx) &&
Expand All @@ -264,7 +264,7 @@ void AggregationPhase3Algorithm<LocalOrdinal, GlobalOrdinal, Node>::
aggStat(neighIdx) = AGGREGATED;
procWinner(neighIdx, 0) = myRank;
vertex2AggId(neighIdx, 0) = aggId;
Kokkos::atomic_decrement(&numNonAggregated());
Kokkos::atomic_dec(&numNonAggregated());
}
}
return;
Expand All @@ -279,7 +279,7 @@ void AggregationPhase3Algorithm<LocalOrdinal, GlobalOrdinal, Node>::
aggStat(nodeIdx) = AGGREGATED;
procWinner(nodeIdx, 0) = myRank;
vertex2AggId(nodeIdx, 0) = vertex2AggId(neighIdx, 0);
Kokkos::atomic_decrement(&numNonAggregated());
Kokkos::atomic_dec(&numNonAggregated());
return;
}
}
Expand All @@ -293,7 +293,7 @@ void AggregationPhase3Algorithm<LocalOrdinal, GlobalOrdinal, Node>::
aggStat(nodeIdx) = AGGREGATED;
procWinner(nodeIdx, 0) = myRank;
vertex2AggId(nodeIdx, 0) = vertex2AggId(otherNodeIdx, 0);
Kokkos::atomic_decrement(&numNonAggregated());
Kokkos::atomic_dec(&numNonAggregated());
return;
}
}
Expand All @@ -306,7 +306,7 @@ void AggregationPhase3Algorithm<LocalOrdinal, GlobalOrdinal, Node>::
aggStat(nodeIdx) = AGGREGATED;
procWinner(nodeIdx, 0) = myRank;
vertex2AggId(nodeIdx, 0) = aggId;
Kokkos::atomic_decrement(&numNonAggregated());
Kokkos::atomic_dec(&numNonAggregated());
}
});
// LBV on 09/27/19: here we could copy numNonAggregated to host
Expand Down
10 changes: 5 additions & 5 deletions packages/nox/test/tpetra/1DFEM_Functors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,26 +67,26 @@ struct RowCountsFunctor
void operator() (const LO localRow, std::size_t& curNumLocalEntries) const
{
// Add a diagonal matrix entry
Kokkos::atomic_increment(&counts_(localRow));
Kokkos::atomic_inc(&counts_(localRow));
++curNumLocalEntries;
// Contribute a matrix entry to the previous row
if (localRow > 0) {
Kokkos::atomic_increment(&counts_(localRow-1));
Kokkos::atomic_inc(&counts_(localRow-1));
++curNumLocalEntries;
}
// Contribute a matrix entry to the next row
if (localRow < numMyNodes_-1) {
Kokkos::atomic_increment(&counts_(localRow+1));
Kokkos::atomic_inc(&counts_(localRow+1));
++curNumLocalEntries;
}
// MPI process to the left sends us an entry
if ((myRank_ > 0) && (localRow == 0)) {
Kokkos::atomic_increment(&counts_(localRow));
Kokkos::atomic_inc(&counts_(localRow));
++curNumLocalEntries;
}
// MPI process to the right sends us an entry
if ((myRank_ < numProcs_-1) && (localRow == numMyNodes_-1)) {
Kokkos::atomic_increment(&counts_(localRow));
Kokkos::atomic_inc(&counts_(localRow));
++curNumLocalEntries;
}
}
Expand Down
16 changes: 8 additions & 8 deletions packages/sacado/test/performance/fenl_assembly/fenl_functors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,10 +250,10 @@ class NodeNodeGraph {
if ( result.success() ) {

// If row node is owned then increment count
if ( row_node < row_count.extent(0) ) { atomic_increment( & row_count( row_node ) ); }
if ( row_node < row_count.extent(0) ) { Kokkos::atomic_inc( & row_count( row_node ) ); }

// If column node is owned and not equal to row node then increment count
if ( col_node < row_count.extent(0) && col_node != row_node ) { atomic_increment( & row_count( col_node ) ); }
if ( col_node < row_count.extent(0) && col_node != row_node ) { Kokkos::atomic_inc( & row_count( col_node ) ); }
}
else if ( result.failed() ) {
++count ;
Expand All @@ -276,12 +276,12 @@ class NodeNodeGraph {
const unsigned col_node = key.second ;

if ( row_node < row_count.extent(0) ) {
const unsigned offset = graph.row_map( row_node ) + atomic_fetch_add( & row_count( row_node ) , atomic_incr_type(1) );
const unsigned offset = graph.row_map( row_node ) + Kokkos::atomic_fetch_add( & row_count( row_node ) , atomic_incr_type(1) );
graph.entries( offset ) = col_node ;
}

if ( col_node < row_count.extent(0) && col_node != row_node ) {
const unsigned offset = graph.row_map( col_node ) + atomic_fetch_add( & row_count( col_node ) , atomic_incr_type(1) );
const unsigned offset = graph.row_map( col_node ) + Kokkos::atomic_fetch_add( & row_count( col_node ) , atomic_incr_type(1) );
graph.entries( offset ) = row_node ;
}
}
Expand Down Expand Up @@ -650,12 +650,12 @@ class ElementComputation
for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
const unsigned row = node_index[i] ;
if ( row < this->residual.extent(0) ) {
atomic_add( & this->residual( row ) , res[i] );
Kokkos::atomic_add( & this->residual( row ) , res[i] );

for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
const unsigned entry = this->elem_graph( ielem , i , j );
if ( entry != ~0u ) {
atomic_add( & this->jacobian.coeff( entry ) , mat[i][j] );
Kokkos::atomic_add( & this->jacobian.coeff( entry ) , mat[i][j] );
}
}
}
Expand Down Expand Up @@ -835,12 +835,12 @@ class ElementComputation
for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
const unsigned row = node_index[i] ;
if ( row < this->residual.extent(0) ) {
atomic_add( & this->residual( row ) , res[i].val() );
Kokkos::atomic_add( & this->residual( row ) , res[i].val() );

for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
const unsigned entry = this->elem_graph( ielem , i , j );
if ( entry != ~0u ) {
atomic_add( & this->jacobian.coeff( entry ) ,
Kokkos::atomic_add( & this->jacobian.coeff( entry ) ,
res[i].fastAccessDx(j) );
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,10 +250,10 @@ class NodeNodeGraph {
if ( result.success() ) {

// If row node is owned then increment count
if ( row_node < row_count.extent(0) ) { atomic_increment( & row_count( row_node ) ); }
if ( row_node < row_count.extent(0) ) { atomic_inc( & row_count( row_node ) ); }

// If column node is owned and not equal to row node then increment count
if ( col_node < row_count.extent(0) && col_node != row_node ) { atomic_increment( & row_count( col_node ) ); }
if ( col_node < row_count.extent(0) && col_node != row_node ) { atomic_inc( & row_count( col_node ) ); }
}
else if ( result.failed() ) {
++count ;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ namespace BaskerNS
BASKER_INLINE
void atomic_barrier_fanout(volatile Int &value, const Int l_size)
{
Kokkos::atomic_increment(&(value));
Kokkos::atomic_inc(&(value))
while(value < l_size)
{
BASKER_NO_OP;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace Tacho {

template <typename VT, typename DT>
Driver<VT, DT>::Driver()
: _method(1), _order_connected_graph_separately(0), _m(0), _nnz(0), _ap(), _h_ap(), _aj(), _h_aj(), _perm(),
: _method(1), _order_connected_graph_separately(1), _m(0), _nnz(0), _ap(), _h_ap(), _aj(), _h_aj(), _perm(),
_h_perm(), _peri(), _h_peri(), _m_graph(0), _nnz_graph(0), _h_ap_graph(), _h_aj_graph(), _h_perm_graph(),
_h_peri_graph(), _nsupernodes(0), _N(nullptr), _verbose(0), _small_problem_thres(1024), _serial_thres_size(-1),
_mb(-1), _nb(-1), _front_update_mode(-1), _levelset(0), _device_level_cut(0), _device_factor_thres(128),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,10 @@ template <typename ValueType, typename DeviceType> class NumericToolsBase {
virtual void print_stat_factor() {
const double kilo(1024);
printf(" Time\n");
printf(" time for extra tasks (allocation): %10.6f s\n", stat.t_extra);
printf(" time for copying A into supernodes: %10.6f s\n", stat.t_copy);
printf(" time for numeric factorization: %10.6f s\n", stat.t_factor);
printf(" total time spent: %10.6f s\n", (stat.t_copy + stat.t_factor));
printf(" total time spent: %10.6f s\n", (stat.t_extra + stat.t_copy + stat.t_factor));
printf("\n");
printf(" Memory\n");
printf(" memory used in factorization: %10.3f MB\n", stat.m_used / kilo / kilo);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,7 @@ template <typename ValueType, typename DeviceType> class NumericToolsFactory;
_gid_colidx, _sid_ptr, _sid_colidx, _blk_colidx, _stree_parent, \
_stree_ptr, _stree_children, _stree_level, _stree_roots); \
numeric_tools_levelset_name *N = dynamic_cast<numeric_tools_levelset_name *>(object); \
N->initialize(_device_level_cut, _device_factor_thres, _device_solve_thres, _verbose); \
N->createStream(_nstreams, _verbose); \
N->initialize(_device_level_cut, _device_factor_thres, _device_solve_thres, _nstreams, _verbose); \
} while (false)

///
Expand Down
Loading

0 comments on commit e7fd307

Please sign in to comment.