Skip to content

Commit

Permalink
Tacho : move workspace allocation to symbolic from numeric
Browse files Browse the repository at this point in the history
Signed-off-by: iyamazaki <ic.yamazaki@gmail.com>
  • Loading branch information
iyamazaki committed Oct 24, 2024
1 parent f0d5905 commit 4da961b
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 87 deletions.
2 changes: 2 additions & 0 deletions packages/amesos2/src/Amesos2_Tacho_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ class TachoSolver : public SolverCore<Amesos2::TachoSolver, Matrix, Vector>
int method;
int variant;
int small_problem_threshold_size;
int streams;
bool verbose;
// int num_kokkos_threads;
// int max_num_superblocks;
} data_;
Expand Down
15 changes: 12 additions & 3 deletions packages/amesos2/src/Amesos2_Tacho_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ TachoSolver<Matrix,Vector>::TachoSolver(
Teuchos::RCP<const Vector> B )
: SolverCore<Amesos2::TachoSolver,Matrix,Vector>(A, X, B)
{
data_.method = 1; // Cholesky
data_.variant = 2; // solver variant
data_.method = 1; // Cholesky
data_.variant = 2; // solver variant
data_.streams = 1; // # of streams
data_.verbose = false; // verbose
}


Expand Down Expand Up @@ -74,7 +76,8 @@ TachoSolver<Matrix,Vector>::symbolicFactorization_impl()
data_.solver.setSolutionMethod(data_.method);
data_.solver.setLevelSetOptionAlgorithmVariant(data_.variant);
data_.solver.setSmallProblemThresholdsize(data_.small_problem_threshold_size);

data_.solver.setVerbose(data_.verbose);
data_.solver.setLevelSetOptionNumStreams(data_.streams);
// TODO: Confirm param options
// data_.solver.setMaxNumberOfSuperblocks(data_.max_num_superblocks);

Expand Down Expand Up @@ -216,6 +219,10 @@ TachoSolver<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::Param
data_.variant = parameterList->get<int> ("variant", 2);
// small problem threshold
data_.small_problem_threshold_size = parameterList->get<int> ("small problem threshold size", 1024);
// verbosity
data_.verbose = parameterList->get<bool> ("verbose", false);
// # of streams
data_.streams = parameterList->get<int> ("num-streams", 1);
// TODO: Confirm param options
// data_.num_kokkos_threads = parameterList->get<int>("kokkos-threads", 1);
// data_.max_num_superblocks = parameterList->get<int>("max-num-superblocks", 4);
Expand All @@ -234,6 +241,8 @@ TachoSolver<Matrix,Vector>::getValidParameters_impl() const
pl->set("method", "chol", "Type of factorization, chol, ldl, or lu");
pl->set("variant", 2, "Type of solver variant, 0, 1, or 2");
pl->set("small problem threshold size", 1024, "Problem size threshold below with Tacho uses LAPACK.");
pl->set("verbose", false, "Verbosity");
pl->set("num-streams", 1, "Number of GPU streams");

// TODO: Confirm param options
// pl->set("kokkos-threads", 1, "Number of threads");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace Tacho {

template <typename VT, typename DT>
Driver<VT, DT>::Driver()
: _method(1), _order_connected_graph_separately(0), _m(0), _nnz(0), _ap(), _h_ap(), _aj(), _h_aj(), _perm(),
: _method(1), _order_connected_graph_separately(1), _m(0), _nnz(0), _ap(), _h_ap(), _aj(), _h_aj(), _perm(),
_h_perm(), _peri(), _h_peri(), _m_graph(0), _nnz_graph(0), _h_ap_graph(), _h_aj_graph(), _h_perm_graph(),
_h_peri_graph(), _nsupernodes(0), _N(nullptr), _verbose(0), _small_problem_thres(1024), _serial_thres_size(-1),
_mb(-1), _nb(-1), _front_update_mode(-1), _levelset(0), _device_level_cut(0), _device_factor_thres(128),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,10 @@ template <typename ValueType, typename DeviceType> class NumericToolsBase {
virtual void print_stat_factor() {
const double kilo(1024);
printf(" Time\n");
printf(" time for extra tasks (allocation): %10.6f s\n", stat.t_extra);
printf(" time for copying A into supernodes: %10.6f s\n", stat.t_copy);
printf(" time for numeric factorization: %10.6f s\n", stat.t_factor);
printf(" total time spent: %10.6f s\n", (stat.t_copy + stat.t_factor));
printf(" total time spent: %10.6f s\n", (stat.t_extra + stat.t_copy + stat.t_factor));
printf("\n");
printf(" Memory\n");
printf(" memory used in factorization: %10.3f MB\n", stat.m_used / kilo / kilo);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,7 @@ template <typename ValueType, typename DeviceType> class NumericToolsFactory;
_gid_colidx, _sid_ptr, _sid_colidx, _blk_colidx, _stree_parent, \
_stree_ptr, _stree_children, _stree_level, _stree_roots); \
numeric_tools_levelset_name *N = dynamic_cast<numeric_tools_levelset_name *>(object); \
N->initialize(_device_level_cut, _device_factor_thres, _device_solve_thres, _verbose); \
N->createStream(_nstreams, _verbose); \
N->initialize(_device_level_cut, _device_factor_thres, _device_solve_thres, _nstreams, _verbose); \
} while (false)

///
Expand Down
Loading

0 comments on commit 4da961b

Please sign in to comment.