allow changing dav subspace size; print wfn mem

block-hczhai · Dec 1, 2023 · a72607e · a72607e
1 parent 038bf5f
commit a72607e
Show file tree

Hide file tree

Showing 10 changed files with 235 additions and 48 deletions.
diff --git a/pyblock2/driver/block2main b/pyblock2/driver/block2main
@@ -2324,6 +2324,8 @@ if not pre_run:
  dmrg.davidson_max_iter = int(dic.get("davidson_max_iter", 5000))
  dmrg.davidson_soft_max_iter = int(
  dic.get("davidson_soft_max_iter", 4000))
+ dmrg.davidson_def_max_size = int(
+ dic.get("davidson_def_max_size", 50))
  dmrg.store_wfn_spectra = store_wfn_spectra
  dmrg.site_dependent_bond_dims = site_dependent_bdims
 
@@ -2466,6 +2468,8 @@ if not pre_run:
  dmrg.davidson_max_iter = int(dic.get("davidson_max_iter", 5000))
  dmrg.davidson_soft_max_iter = int(
  dic.get("davidson_soft_max_iter", 4000))
+ dmrg.davidson_def_max_size = int(
+ dic.get("davidson_def_max_size", 50))
  dmrg.decomp_type = decomp_type
  dmrg.trunc_type = trunc_type
  dmrg.davidson_conv_thrds = VectorFP(dav_thrds)

diff --git a/pyblock2/driver/core.py b/pyblock2/driver/core.py
@@ -2775,6 +2775,7 @@ def dmrg(
  cutoff=1e-20,
  twosite_to_onesite=None,
  dav_max_iter=4000,
+ dav_def_max_size=50,
  proj_mpss=None,
  proj_weights=None,
  store_wfn_spectra=True,
@@ -2820,6 +2821,7 @@ def dmrg(
  dmrg.davidson_conv_thrds = bw.VectorFP(thrds)
  dmrg.davidson_max_iter = dav_max_iter + 100
  dmrg.davidson_soft_max_iter = dav_max_iter
+ dmrg.davidson_def_max_size = dav_def_max_size
  dmrg.store_wfn_spectra = store_wfn_spectra
  dmrg.iprint = iprint
  dmrg.cutoff = cutoff

diff --git a/pyblock2/driver/parser.py b/pyblock2/driver/parser.py
@@ -42,7 +42,7 @@
  "model", "k_symmetry", "k_irrep", "k_mod", "init_mps_center", "heisenberg",
  "use_complex", "real_density_matrix", "expt_algo_type", "one_body_parallel_rule",
  "davidson_max_iter", "davidson_soft_max_iter", "linear_soft_max_iter",
- "n_sub_sweeps", "complex_mps", "split_states", "trans_mps_to_complex",
+ "davidson_def_max_size", "n_sub_sweeps", "complex_mps", "split_states", "trans_mps_to_complex",
  "use_general_spin", "trans_integral_to_spin_orbital", "store_wfn_spectra",
  "tran_bra_range", "tran_ket_range", "tran_triangular", "use_hybrid_complex",
  "mem_ratio", "min_mpo_mem", "qc_mpo_type", "full_integral", "skip_inact_ext_sites",

diff --git a/src/big_site/sweep_algorithm_big_site.hpp b/src/big_site/sweep_algorithm_big_site.hpp
@@ -297,6 +297,8 @@ struct DMRGBigSiteAQCC : DMRGBigSite<S, FL, FLS> {
  using DMRGBigSite<S, FL, FLS>::ext_mes;
  using DMRGBigSite<S, FL, FLS>::davidson_soft_max_iter;
  using DMRGBigSite<S, FL, FLS>::davidson_max_iter;
+ using DMRGBigSite<S, FL, FLS>::davidson_def_min_size;
+ using DMRGBigSite<S, FL, FLS>::davidson_def_max_size;
  using DMRGBigSite<S, FL, FLS>::noise_type;
  using DMRGBigSite<S, FL, FLS>::decomp_type;
  using DMRGBigSite<S, FL, FLS>::energies;
@@ -464,7 +466,8 @@ struct DMRGBigSiteAQCC : DMRGBigSite<S, FL, FLS> {
  // TODO: For RAS mode, it might be good to do several iterations
  // for the first site as well.
  pdi = aqcc_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter,
- davidson_soft_max_iter, DavidsonTypes::Normal, 0.0,
+ davidson_soft_max_iter, davidson_def_min_size,
+ davidson_def_max_size, DavidsonTypes::Normal, 0.0,
  me->para_rule);
  teig += _t.get_time();
  if ((noise_type & NoiseTypes::Perturbative) && noise != 0)
@@ -549,6 +552,7 @@ struct DMRGBigSiteAQCC : DMRGBigSite<S, FL, FLS> {
  const auto pdi2 =
  aqcc_eff->eigs(iprint >= 3, davidson_conv_thrd,
  davidson_max_iter, davidson_soft_max_iter,
+ davidson_def_min_size, davidson_def_max_size,
  DavidsonTypes::Normal, 0.0, me->para_rule);
  const FPS energy =
  (FPS)std::get<0>(pdi2) + (FPS)me->mpo->const_e;
@@ -598,6 +602,7 @@ struct DMRGBigSiteAQCC : DMRGBigSite<S, FL, FLS> {
  auto aqcc_eff = get_aqcc_eff(h_eff, d_eff1, d_eff2, d_eff3, d_eff4);
  pdi = aqcc_eff->eigs(iprint >= 3, davidson_conv_thrd,
  davidson_max_iter, davidson_soft_max_iter,
+ davidson_def_min_size, davidson_def_max_size,
  DavidsonTypes::Normal, 0.0, me->para_rule);
  const FPS energy = (FPS)std::get<0>(pdi) + (FPS)me->mpo->const_e;
  smallest_energy = min(energy, smallest_energy);
@@ -630,6 +635,8 @@ struct DMRGBigSiteAQCCOLD : DMRGBigSite<S, FL, FLS> {
  using DMRGBigSite<S, FL, FLS>::me;
  using DMRGBigSite<S, FL, FLS>::davidson_soft_max_iter;
  using DMRGBigSite<S, FL, FLS>::davidson_max_iter;
+ using DMRGBigSite<S, FL, FLS>::davidson_def_min_size;
+ using DMRGBigSite<S, FL, FLS>::davidson_def_max_size;
  using DMRGBigSite<S, FL, FLS>::noise_type;
  using DMRGBigSite<S, FL, FLS>::decomp_type;
  using DMRGBigSite<S, FL, FLS>::energies;
@@ -721,7 +728,8 @@ struct DMRGBigSiteAQCCOLD : DMRGBigSite<S, FL, FLS> {
  Partition<S, FL>::get_uniq_labels({h_eff->hop_mat});
  vector<vector<pair<uint8_t, S>>> msubsl =
  Partition<S, FL>::get_uniq_sub_labels(
- h_eff->op->mat, h_eff->hop_mat, msl, h_eff->hop_left_vacuum);
+ h_eff->op->mat, h_eff->hop_mat, msl,
+ h_eff->hop_left_vacuum);
  diag_info->initialize_diag(
  cdq, h_eff->opdq, msubsl[0], h_eff->left_op_infos,
  h_eff->right_op_infos, h_eff->diag->info,
@@ -749,6 +757,7 @@ struct DMRGBigSiteAQCCOLD : DMRGBigSite<S, FL, FLS> {
  const auto pdi2 =
  h_eff->eigs(iprint >= 3, davidson_conv_thrd,
  davidson_max_iter, davidson_soft_max_iter,
+ davidson_def_min_size, davidson_def_max_size,
  DavidsonTypes::Normal, 0.0, me->para_rule);
  const auto energy =
  (FPS)std::get<0>(pdi2) + (FPS)me->mpo->const_e;
@@ -802,6 +811,7 @@ struct DMRGBigSiteAQCCOLD : DMRGBigSite<S, FL, FLS> {
  } else {
  pdi = h_eff->eigs(iprint >= 3, davidson_conv_thrd,
  davidson_max_iter, davidson_soft_max_iter,
+ davidson_def_min_size, davidson_def_max_size,
  DavidsonTypes::Normal, 0.0, me->para_rule);
  }
  teig += _t.get_time();

diff --git a/src/core/parallel_mpi.hpp b/src/core/parallel_mpi.hpp
@@ -384,6 +384,13 @@ template <typename S> struct MPICommunicator : ParallelCommunicator<S> {
  void allreduce_max(vector<complex<float>> &vs) override {
  allreduce_max(vs.data(), vs.size());
  }
+ void reduce_max(uint64_t *data, size_t len, int owner) override {
+ _t.get_time();
+ int ierr = MPI_Reduce(rank == owner ? MPI_IN_PLACE : data, data, len,
+ MPI_UINT64_T, MPI_MAX, owner, comm);
+ assert(ierr == 0);
+ tcomm += _t.get_time();
+ }
  void allreduce_min(double *data, size_t len) override {
  _t.get_time();
  for (size_t offset = 0; offset < len; offset += chunk_size) {
@@ -725,6 +732,9 @@ template <typename S> struct MPICommunicator : ParallelCommunicator<S> {
  void reduce_sum_optional(uint64_t *data, size_t len, int owner) override {
  reduce_sum(data, len, owner);
  }
+ void reduce_max_optional(uint64_t *data, size_t len, int owner) override {
+ reduce_max(data, len, owner);
+ }
  void waitall() override {
  _t.get_time();
  int ierr =

diff --git a/src/core/parallel_rule.hpp b/src/core/parallel_rule.hpp
@@ -214,6 +214,9 @@ template <typename S> struct ParallelCommunicator {
  virtual void allreduce_max(vector<complex<float>> &vs) {
  assert(size == 1);
  }
+ virtual void reduce_max(uint64_t *data, size_t len, int owner) {
+ assert(size == 1);
+ }
  virtual void reduce_sum(const shared_ptr<SparseMatrixGroup<S, double>> &mat,
  int owner) {
  assert(size == 1);
@@ -299,6 +302,7 @@ template <typename S> struct ParallelCommunicator {
  // mainly for no communication parallel execution in serial
  virtual void reduce_sum_optional(double *data, size_t len, int owner) {}
  virtual void reduce_sum_optional(uint64_t *data, size_t len, int owner) {}
+ virtual void reduce_max_optional(uint64_t *data, size_t len, int owner) {}
  virtual void allreduce_logical_or(bool &v) { assert(size == 1); }
  virtual void waitall() { assert(size == 1); }
 };

diff --git a/src/dmrg/effective_functions.hpp b/src/dmrg/effective_functions.hpp
@@ -294,7 +294,8 @@ struct EffectiveFunctions<
  typename const_fl_type<FL>::FL const_e, FL omega, FL eta,
  const shared_ptr<SparseMatrix<S, FL>> &real_bra,
  int n_harmonic_projection = 0, bool iprint = false, FP conv_thrd = 5E-6,
- int max_iter = 5000, int soft_max_iter = -1,
+ int max_iter = 5000, int soft_max_iter = -1, int deflation_min_size = 2,
+ int deflation_max_size = 50,
  const shared_ptr<ParallelRule<S>> &para_rule = nullptr) {
  int nmult = 0, nmultx = 0;
  frame_<FP>()->activate(0);
@@ -367,7 +368,8 @@ struct EffectiveFunctions<
  DavidsonTypes::HarmonicGreaterThan | DavidsonTypes::NoPrecond,
  nmultx, iprint,
  para_rule == nullptr ? nullptr : para_rule->comm, 1E-4,
- max_iter, soft_max_iter, 2, 50);
+ max_iter, soft_max_iter, deflation_min_size,
+ deflation_max_size);
  nmultp = nmult;
  nmult = 0;
  igf = IterativeMatrixFunctions<FL>::deflated_conjugate_gradient(
@@ -465,7 +467,8 @@ struct EffectiveFunctions<
  const shared_ptr<EffectiveHamiltonian<S, FL, MultiMPS<S, FL>>> &h_eff,
  const shared_ptr<EffectiveHamiltonian<S, FC, MultiMPS<S, FC>>> &x_eff,
  bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000,
- int soft_max_iter = -1,
+ int soft_max_iter = -1, int deflation_min_size = 2,
+ int deflation_max_size = 50,
  DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0,
  const shared_ptr<ParallelRule<S>> &para_rule = nullptr) {
  int ndav = 0;
@@ -534,7 +537,7 @@ struct EffectiveFunctions<
  vector<FP> xeners = IterativeMatrixFunctions<FC>::harmonic_davidson(
  f, aa, bs, shift, davidson_type, ndav, iprint,
  para_rule == nullptr ? nullptr : para_rule->comm, conv_thrd,
- max_iter, soft_max_iter);
+ max_iter, soft_max_iter, deflation_min_size, deflation_max_size);
  vector<typename const_fl_type<FP>::FL> eners(xeners.size());
  for (size_t i = 0; i < xeners.size(); i++)
  eners[i] = (typename const_fl_type<FP>::FL)xeners[i];
@@ -675,7 +678,8 @@ struct EffectiveFunctions<S, FL,
  typename const_fl_type<FL>::FL const_e, FL omega, FL eta,
  const shared_ptr<SparseMatrix<S, FL>> &real_bra,
  int n_harmonic_projection = 0, bool iprint = false, FP conv_thrd = 5E-6,
- int max_iter = 5000, int soft_max_iter = -1,
+ int max_iter = 5000, int soft_max_iter = -1, int deflation_min_size = 2,
+ int deflation_max_size = 50,
  const shared_ptr<ParallelRule<S>> &para_rule = nullptr) {
  assert(false);
  return make_tuple(0.0, make_pair(0, 0), (size_t)0, 0.0);
@@ -699,7 +703,8 @@ struct EffectiveFunctions<S, FL,
  const shared_ptr<EffectiveHamiltonian<S, FL, MultiMPS<S, FL>>> &h_eff,
  const shared_ptr<EffectiveHamiltonian<S, FC, MultiMPS<S, FC>>> &x_eff,
  bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000,
- int soft_max_iter = -1,
+ int soft_max_iter = -1, int deflation_min_size = 2,
+ int deflation_max_size = 50,
  DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0,
  const shared_ptr<ParallelRule<S>> &para_rule = nullptr) {
  assert(false);

diff --git a/src/dmrg/effective_hamiltonian.hpp b/src/dmrg/effective_hamiltonian.hpp
@@ -408,7 +408,8 @@ struct EffectiveHamiltonian<S, FL, MPS<S, FL>> {
  // energy, ndav, nflop, tdav
  tuple<typename const_fl_type<FP>::FL, int, size_t, double>
  eigs(bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000,
- int soft_max_iter = -1,
+ int soft_max_iter = -1, int deflation_min_size = 2,
+ int deflation_max_size = 50,
  DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0,
  const shared_ptr<ParallelRule<S>> &para_rule = nullptr,
  const vector<shared_ptr<SparseMatrix<S, FL>>> &ortho_bra =
@@ -437,13 +438,13 @@ struct EffectiveHamiltonian<S, FL, MPS<S, FL>> {
  ? IterativeMatrixFunctions<FL>::harmonic_davidson(
  *tf, aa, bs, shift, davidson_type, ndav, iprint,
  para_rule == nullptr ? nullptr : para_rule->comm,
- conv_thrd, max_iter, soft_max_iter, 2, 50, ors,
- projection_weights)
+ conv_thrd, max_iter, soft_max_iter, deflation_min_size,
+ deflation_max_size, ors, projection_weights)
  : IterativeMatrixFunctions<FL>::harmonic_davidson(
  *this, aa, bs, shift, davidson_type, ndav, iprint,
  para_rule == nullptr ? nullptr : para_rule->comm,
- conv_thrd, max_iter, soft_max_iter, 2, 50, ors,
- projection_weights);
+ conv_thrd, max_iter, soft_max_iter, deflation_min_size,
+ deflation_max_size, ors, projection_weights);
  post_precompute();
  uint64_t nflop = tf->opf->seq->cumulative_nflop;
  if (para_rule != nullptr)
@@ -1002,7 +1003,8 @@ template <typename S, typename FL> struct LinearEffectiveHamiltonian {
  // energy, ndav, nflop, tdav
  tuple<typename const_fl_type<FP>::FL, int, size_t, double>
  eigs(bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000,
- int soft_max_iter = -1,
+ int soft_max_iter = -1, int deflation_min_size = 2,
+ int deflation_max_size = 50,
  DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0,
  const shared_ptr<ParallelRule<S>> &para_rule = nullptr) {
  int ndav = 0;
@@ -1029,7 +1031,7 @@ template <typename S, typename FL> struct LinearEffectiveHamiltonian {
  vector<FP> eners = IterativeMatrixFunctions<FL>::harmonic_davidson(
  *this, aa, bs, shift, davidson_type, ndav, iprint,
  para_rule == nullptr ? nullptr : para_rule->comm, conv_thrd,
- max_iter, soft_max_iter);
+ max_iter, soft_max_iter, deflation_min_size, deflation_max_size);
  for (size_t ih = 0; ih < h_effs.size(); ih++)
  h_effs[ih]->post_precompute();
  uint64_t nflop = tf->opf->seq->cumulative_nflop;
@@ -1458,7 +1460,8 @@ struct EffectiveHamiltonian<S, FL, MultiMPS<S, FL>> {
  // energies, ndav, nflop, tdav
  tuple<vector<typename const_fl_type<FP>::FL>, int, size_t, double>
  eigs(bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000,
- int soft_max_iter = -1,
+ int soft_max_iter = -1, int deflation_min_size = 2,
+ int deflation_max_size = 50,
  DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0,
  const shared_ptr<ParallelRule<S>> &para_rule = nullptr,
  const vector<shared_ptr<SparseMatrix<S, FL>>> &ortho_bra =
@@ -1492,13 +1495,13 @@ struct EffectiveHamiltonian<S, FL, MultiMPS<S, FL>> {
  ? IterativeMatrixFunctions<FL>::harmonic_davidson(
  *tf, aa, bs, shift, davidson_type, ndav, iprint,
  para_rule == nullptr ? nullptr : para_rule->comm,
- conv_thrd, max_iter, soft_max_iter, 2, 50, ors,
- projection_weights)
+ conv_thrd, max_iter, soft_max_iter, deflation_min_size,
+ deflation_max_size, ors, projection_weights)
  : IterativeMatrixFunctions<FL>::harmonic_davidson(
  *this, aa, bs, shift, davidson_type, ndav, iprint,
  para_rule == nullptr ? nullptr : para_rule->comm,
- conv_thrd, max_iter, soft_max_iter, 2, 50, ors,
- projection_weights);
+ conv_thrd, max_iter, soft_max_iter, deflation_min_size,
+ deflation_max_size, ors, projection_weights);
  vector<typename const_fl_type<FP>::FL> eners(xeners.size());
  for (size_t i = 0; i < xeners.size(); i++)
  eners[i] = (typename const_fl_type<FP>::FL)xeners[i];