From a72607e0e0a92897cb2298d67de09a79038e199e Mon Sep 17 00:00:00 2001 From: Huanchen Zhai Date: Thu, 30 Nov 2023 20:31:38 -0800 Subject: [PATCH] allow changing dav subspace size; print wfn mem --- pyblock2/driver/block2main | 4 + pyblock2/driver/core.py | 2 + pyblock2/driver/parser.py | 2 +- src/big_site/sweep_algorithm_big_site.hpp | 14 ++- src/core/parallel_mpi.hpp | 10 ++ src/core/parallel_rule.hpp | 4 + src/dmrg/effective_functions.hpp | 17 ++- src/dmrg/effective_hamiltonian.hpp | 27 ++-- src/dmrg/sweep_algorithm.hpp | 147 ++++++++++++++++++++-- src/pybind/pybind_dmrg.hpp | 56 ++++++--- 10 files changed, 235 insertions(+), 48 deletions(-) diff --git a/pyblock2/driver/block2main b/pyblock2/driver/block2main index 877bcd41..711dbd5e 100755 --- a/pyblock2/driver/block2main +++ b/pyblock2/driver/block2main @@ -2324,6 +2324,8 @@ if not pre_run: dmrg.davidson_max_iter = int(dic.get("davidson_max_iter", 5000)) dmrg.davidson_soft_max_iter = int( dic.get("davidson_soft_max_iter", 4000)) + dmrg.davidson_def_max_size = int( + dic.get("davidson_def_max_size", 50)) dmrg.store_wfn_spectra = store_wfn_spectra dmrg.site_dependent_bond_dims = site_dependent_bdims @@ -2466,6 +2468,8 @@ if not pre_run: dmrg.davidson_max_iter = int(dic.get("davidson_max_iter", 5000)) dmrg.davidson_soft_max_iter = int( dic.get("davidson_soft_max_iter", 4000)) + dmrg.davidson_def_max_size = int( + dic.get("davidson_def_max_size", 50)) dmrg.decomp_type = decomp_type dmrg.trunc_type = trunc_type dmrg.davidson_conv_thrds = VectorFP(dav_thrds) diff --git a/pyblock2/driver/core.py b/pyblock2/driver/core.py index fc5b783e..0d7f3f6b 100644 --- a/pyblock2/driver/core.py +++ b/pyblock2/driver/core.py @@ -2775,6 +2775,7 @@ def dmrg( cutoff=1e-20, twosite_to_onesite=None, dav_max_iter=4000, + dav_def_max_size=50, proj_mpss=None, proj_weights=None, store_wfn_spectra=True, @@ -2820,6 +2821,7 @@ def dmrg( dmrg.davidson_conv_thrds = bw.VectorFP(thrds) dmrg.davidson_max_iter = dav_max_iter + 100 dmrg.davidson_soft_max_iter = dav_max_iter + dmrg.davidson_def_max_size = dav_def_max_size dmrg.store_wfn_spectra = store_wfn_spectra dmrg.iprint = iprint dmrg.cutoff = cutoff diff --git a/pyblock2/driver/parser.py b/pyblock2/driver/parser.py index d6679cd5..c1ef5d75 100755 --- a/pyblock2/driver/parser.py +++ b/pyblock2/driver/parser.py @@ -42,7 +42,7 @@ "model", "k_symmetry", "k_irrep", "k_mod", "init_mps_center", "heisenberg", "use_complex", "real_density_matrix", "expt_algo_type", "one_body_parallel_rule", "davidson_max_iter", "davidson_soft_max_iter", "linear_soft_max_iter", - "n_sub_sweeps", "complex_mps", "split_states", "trans_mps_to_complex", + "davidson_def_max_size", "n_sub_sweeps", "complex_mps", "split_states", "trans_mps_to_complex", "use_general_spin", "trans_integral_to_spin_orbital", "store_wfn_spectra", "tran_bra_range", "tran_ket_range", "tran_triangular", "use_hybrid_complex", "mem_ratio", "min_mpo_mem", "qc_mpo_type", "full_integral", "skip_inact_ext_sites", diff --git a/src/big_site/sweep_algorithm_big_site.hpp b/src/big_site/sweep_algorithm_big_site.hpp index 59da85ae..d1f80565 100644 --- a/src/big_site/sweep_algorithm_big_site.hpp +++ b/src/big_site/sweep_algorithm_big_site.hpp @@ -297,6 +297,8 @@ struct DMRGBigSiteAQCC : DMRGBigSite { using DMRGBigSite::ext_mes; using DMRGBigSite::davidson_soft_max_iter; using DMRGBigSite::davidson_max_iter; + using DMRGBigSite::davidson_def_min_size; + using DMRGBigSite::davidson_def_max_size; using DMRGBigSite::noise_type; using DMRGBigSite::decomp_type; using DMRGBigSite::energies; @@ -464,7 +466,8 @@ struct DMRGBigSiteAQCC : DMRGBigSite { // TODO: For RAS mode, it might be good to do several iterations // for the first site as well. pdi = aqcc_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter, - davidson_soft_max_iter, DavidsonTypes::Normal, 0.0, + davidson_soft_max_iter, davidson_def_min_size, + davidson_def_max_size, DavidsonTypes::Normal, 0.0, me->para_rule); teig += _t.get_time(); if ((noise_type & NoiseTypes::Perturbative) && noise != 0) @@ -549,6 +552,7 @@ struct DMRGBigSiteAQCC : DMRGBigSite { const auto pdi2 = aqcc_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter, davidson_soft_max_iter, + davidson_def_min_size, davidson_def_max_size, DavidsonTypes::Normal, 0.0, me->para_rule); const FPS energy = (FPS)std::get<0>(pdi2) + (FPS)me->mpo->const_e; @@ -598,6 +602,7 @@ struct DMRGBigSiteAQCC : DMRGBigSite { auto aqcc_eff = get_aqcc_eff(h_eff, d_eff1, d_eff2, d_eff3, d_eff4); pdi = aqcc_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter, davidson_soft_max_iter, + davidson_def_min_size, davidson_def_max_size, DavidsonTypes::Normal, 0.0, me->para_rule); const FPS energy = (FPS)std::get<0>(pdi) + (FPS)me->mpo->const_e; smallest_energy = min(energy, smallest_energy); @@ -630,6 +635,8 @@ struct DMRGBigSiteAQCCOLD : DMRGBigSite { using DMRGBigSite::me; using DMRGBigSite::davidson_soft_max_iter; using DMRGBigSite::davidson_max_iter; + using DMRGBigSite::davidson_def_min_size; + using DMRGBigSite::davidson_def_max_size; using DMRGBigSite::noise_type; using DMRGBigSite::decomp_type; using DMRGBigSite::energies; @@ -721,7 +728,8 @@ struct DMRGBigSiteAQCCOLD : DMRGBigSite { Partition::get_uniq_labels({h_eff->hop_mat}); vector>> msubsl = Partition::get_uniq_sub_labels( - h_eff->op->mat, h_eff->hop_mat, msl, h_eff->hop_left_vacuum); + h_eff->op->mat, h_eff->hop_mat, msl, + h_eff->hop_left_vacuum); diag_info->initialize_diag( cdq, h_eff->opdq, msubsl[0], h_eff->left_op_infos, h_eff->right_op_infos, h_eff->diag->info, @@ -749,6 +757,7 @@ struct DMRGBigSiteAQCCOLD : DMRGBigSite { const auto pdi2 = h_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter, davidson_soft_max_iter, + davidson_def_min_size, davidson_def_max_size, DavidsonTypes::Normal, 0.0, me->para_rule); const auto energy = (FPS)std::get<0>(pdi2) + (FPS)me->mpo->const_e; @@ -802,6 +811,7 @@ struct DMRGBigSiteAQCCOLD : DMRGBigSite { } else { pdi = h_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter, davidson_soft_max_iter, + davidson_def_min_size, davidson_def_max_size, DavidsonTypes::Normal, 0.0, me->para_rule); } teig += _t.get_time(); diff --git a/src/core/parallel_mpi.hpp b/src/core/parallel_mpi.hpp index 7cbb7f2a..e8c73da5 100644 --- a/src/core/parallel_mpi.hpp +++ b/src/core/parallel_mpi.hpp @@ -384,6 +384,13 @@ template struct MPICommunicator : ParallelCommunicator { void allreduce_max(vector> &vs) override { allreduce_max(vs.data(), vs.size()); } + void reduce_max(uint64_t *data, size_t len, int owner) override { + _t.get_time(); + int ierr = MPI_Reduce(rank == owner ? MPI_IN_PLACE : data, data, len, + MPI_UINT64_T, MPI_MAX, owner, comm); + assert(ierr == 0); + tcomm += _t.get_time(); + } void allreduce_min(double *data, size_t len) override { _t.get_time(); for (size_t offset = 0; offset < len; offset += chunk_size) { @@ -725,6 +732,9 @@ template struct MPICommunicator : ParallelCommunicator { void reduce_sum_optional(uint64_t *data, size_t len, int owner) override { reduce_sum(data, len, owner); } + void reduce_max_optional(uint64_t *data, size_t len, int owner) override { + reduce_max(data, len, owner); + } void waitall() override { _t.get_time(); int ierr = diff --git a/src/core/parallel_rule.hpp b/src/core/parallel_rule.hpp index 2c7061e1..5f9e686e 100644 --- a/src/core/parallel_rule.hpp +++ b/src/core/parallel_rule.hpp @@ -214,6 +214,9 @@ template struct ParallelCommunicator { virtual void allreduce_max(vector> &vs) { assert(size == 1); } + virtual void reduce_max(uint64_t *data, size_t len, int owner) { + assert(size == 1); + } virtual void reduce_sum(const shared_ptr> &mat, int owner) { assert(size == 1); @@ -299,6 +302,7 @@ template struct ParallelCommunicator { // mainly for no communication parallel execution in serial virtual void reduce_sum_optional(double *data, size_t len, int owner) {} virtual void reduce_sum_optional(uint64_t *data, size_t len, int owner) {} + virtual void reduce_max_optional(uint64_t *data, size_t len, int owner) {} virtual void allreduce_logical_or(bool &v) { assert(size == 1); } virtual void waitall() { assert(size == 1); } }; diff --git a/src/dmrg/effective_functions.hpp b/src/dmrg/effective_functions.hpp index 045716d6..6d07d7bf 100644 --- a/src/dmrg/effective_functions.hpp +++ b/src/dmrg/effective_functions.hpp @@ -294,7 +294,8 @@ struct EffectiveFunctions< typename const_fl_type::FL const_e, FL omega, FL eta, const shared_ptr> &real_bra, int n_harmonic_projection = 0, bool iprint = false, FP conv_thrd = 5E-6, - int max_iter = 5000, int soft_max_iter = -1, + int max_iter = 5000, int soft_max_iter = -1, int deflation_min_size = 2, + int deflation_max_size = 50, const shared_ptr> ¶_rule = nullptr) { int nmult = 0, nmultx = 0; frame_()->activate(0); @@ -367,7 +368,8 @@ struct EffectiveFunctions< DavidsonTypes::HarmonicGreaterThan | DavidsonTypes::NoPrecond, nmultx, iprint, para_rule == nullptr ? nullptr : para_rule->comm, 1E-4, - max_iter, soft_max_iter, 2, 50); + max_iter, soft_max_iter, deflation_min_size, + deflation_max_size); nmultp = nmult; nmult = 0; igf = IterativeMatrixFunctions::deflated_conjugate_gradient( @@ -465,7 +467,8 @@ struct EffectiveFunctions< const shared_ptr>> &h_eff, const shared_ptr>> &x_eff, bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000, - int soft_max_iter = -1, + int soft_max_iter = -1, int deflation_min_size = 2, + int deflation_max_size = 50, DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0, const shared_ptr> ¶_rule = nullptr) { int ndav = 0; @@ -534,7 +537,7 @@ struct EffectiveFunctions< vector xeners = IterativeMatrixFunctions::harmonic_davidson( f, aa, bs, shift, davidson_type, ndav, iprint, para_rule == nullptr ? nullptr : para_rule->comm, conv_thrd, - max_iter, soft_max_iter); + max_iter, soft_max_iter, deflation_min_size, deflation_max_size); vector::FL> eners(xeners.size()); for (size_t i = 0; i < xeners.size(); i++) eners[i] = (typename const_fl_type::FL)xeners[i]; @@ -675,7 +678,8 @@ struct EffectiveFunctions::FL const_e, FL omega, FL eta, const shared_ptr> &real_bra, int n_harmonic_projection = 0, bool iprint = false, FP conv_thrd = 5E-6, - int max_iter = 5000, int soft_max_iter = -1, + int max_iter = 5000, int soft_max_iter = -1, int deflation_min_size = 2, + int deflation_max_size = 50, const shared_ptr> ¶_rule = nullptr) { assert(false); return make_tuple(0.0, make_pair(0, 0), (size_t)0, 0.0); @@ -699,7 +703,8 @@ struct EffectiveFunctions>> &h_eff, const shared_ptr>> &x_eff, bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000, - int soft_max_iter = -1, + int soft_max_iter = -1, int deflation_min_size = 2, + int deflation_max_size = 50, DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0, const shared_ptr> ¶_rule = nullptr) { assert(false); diff --git a/src/dmrg/effective_hamiltonian.hpp b/src/dmrg/effective_hamiltonian.hpp index c3271158..d6dd4c4b 100644 --- a/src/dmrg/effective_hamiltonian.hpp +++ b/src/dmrg/effective_hamiltonian.hpp @@ -408,7 +408,8 @@ struct EffectiveHamiltonian> { // energy, ndav, nflop, tdav tuple::FL, int, size_t, double> eigs(bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000, - int soft_max_iter = -1, + int soft_max_iter = -1, int deflation_min_size = 2, + int deflation_max_size = 50, DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0, const shared_ptr> ¶_rule = nullptr, const vector>> &ortho_bra = @@ -437,13 +438,13 @@ struct EffectiveHamiltonian> { ? IterativeMatrixFunctions::harmonic_davidson( *tf, aa, bs, shift, davidson_type, ndav, iprint, para_rule == nullptr ? nullptr : para_rule->comm, - conv_thrd, max_iter, soft_max_iter, 2, 50, ors, - projection_weights) + conv_thrd, max_iter, soft_max_iter, deflation_min_size, + deflation_max_size, ors, projection_weights) : IterativeMatrixFunctions::harmonic_davidson( *this, aa, bs, shift, davidson_type, ndav, iprint, para_rule == nullptr ? nullptr : para_rule->comm, - conv_thrd, max_iter, soft_max_iter, 2, 50, ors, - projection_weights); + conv_thrd, max_iter, soft_max_iter, deflation_min_size, + deflation_max_size, ors, projection_weights); post_precompute(); uint64_t nflop = tf->opf->seq->cumulative_nflop; if (para_rule != nullptr) @@ -1002,7 +1003,8 @@ template struct LinearEffectiveHamiltonian { // energy, ndav, nflop, tdav tuple::FL, int, size_t, double> eigs(bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000, - int soft_max_iter = -1, + int soft_max_iter = -1, int deflation_min_size = 2, + int deflation_max_size = 50, DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0, const shared_ptr> ¶_rule = nullptr) { int ndav = 0; @@ -1029,7 +1031,7 @@ template struct LinearEffectiveHamiltonian { vector eners = IterativeMatrixFunctions::harmonic_davidson( *this, aa, bs, shift, davidson_type, ndav, iprint, para_rule == nullptr ? nullptr : para_rule->comm, conv_thrd, - max_iter, soft_max_iter); + max_iter, soft_max_iter, deflation_min_size, deflation_max_size); for (size_t ih = 0; ih < h_effs.size(); ih++) h_effs[ih]->post_precompute(); uint64_t nflop = tf->opf->seq->cumulative_nflop; @@ -1458,7 +1460,8 @@ struct EffectiveHamiltonian> { // energies, ndav, nflop, tdav tuple::FL>, int, size_t, double> eigs(bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000, - int soft_max_iter = -1, + int soft_max_iter = -1, int deflation_min_size = 2, + int deflation_max_size = 50, DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0, const shared_ptr> ¶_rule = nullptr, const vector>> &ortho_bra = @@ -1492,13 +1495,13 @@ struct EffectiveHamiltonian> { ? IterativeMatrixFunctions::harmonic_davidson( *tf, aa, bs, shift, davidson_type, ndav, iprint, para_rule == nullptr ? nullptr : para_rule->comm, - conv_thrd, max_iter, soft_max_iter, 2, 50, ors, - projection_weights) + conv_thrd, max_iter, soft_max_iter, deflation_min_size, + deflation_max_size, ors, projection_weights) : IterativeMatrixFunctions::harmonic_davidson( *this, aa, bs, shift, davidson_type, ndav, iprint, para_rule == nullptr ? nullptr : para_rule->comm, - conv_thrd, max_iter, soft_max_iter, 2, 50, ors, - projection_weights); + conv_thrd, max_iter, soft_max_iter, deflation_min_size, + deflation_max_size, ors, projection_weights); vector::FL> eners(xeners.size()); for (size_t i = 0; i < xeners.size(); i++) eners[i] = (typename const_fl_type::FL)xeners[i]; diff --git a/src/dmrg/sweep_algorithm.hpp b/src/dmrg/sweep_algorithm.hpp index f01e7a4d..33587c99 100644 --- a/src/dmrg/sweep_algorithm.hpp +++ b/src/dmrg/sweep_algorithm.hpp @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -107,6 +108,9 @@ template struct DMRG { size_t sweep_cumulative_nflop = 0; size_t sweep_max_pket_size = 0; size_t sweep_max_eff_ham_size = 0; + size_t sweep_max_eff_wfn_size = 0; + int davidson_def_min_size = 2; + int davidson_def_max_size = 50; double tprt = 0, teig = 0, teff = 0, tmve = 0, tblk = 0, tdm = 0, tsplt = 0, tsvd = 0, torth = 0; bool print_connection_time = false; @@ -586,9 +590,12 @@ template struct DMRG { me->bra->tensors[i], me->ket->tensors[i]); sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, h_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = + max(sweep_max_eff_wfn_size, h_eff->ket->total_memory); teff += _t.get_time(); pdi = h_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter, - davidson_soft_max_iter, davidson_type, + davidson_soft_max_iter, davidson_def_min_size, + davidson_def_max_size, davidson_type, davidson_shift - xreal((FL)me->mpo->const_e), me->para_rule, ortho_bra, projection_weights); teig += _t.get_time(); @@ -885,9 +892,12 @@ template struct DMRG { me->ket->tensors[i]); sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, h_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = + max(sweep_max_eff_wfn_size, h_eff->ket->total_memory); teff += _t.get_time(); pdi = h_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter, - davidson_soft_max_iter, davidson_type, + davidson_soft_max_iter, davidson_def_min_size, + davidson_def_max_size, davidson_type, davidson_shift - xreal((FL)me->mpo->const_e), me->para_rule, ortho_bra, projection_weights); teig += _t.get_time(); @@ -1336,23 +1346,44 @@ template struct DMRG { sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, h_eff->op->get_total_memory() + x_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = max( + sweep_max_eff_wfn_size, + accumulate( + h_eff->ket.begin(), h_eff->ket.end(), (size_t)0, + [](size_t x, shared_ptr> y) { + return x + y->total_memory; + }) + + accumulate( + x_eff->ket.begin(), x_eff->ket.end(), (size_t)0, + [](size_t x, shared_ptr> y) { + return x + y->total_memory; + })); } else { h_eff = me->multi_eff_ham( fuse_left ? FuseTypes::FuseL : FuseTypes::FuseR, forward, true); sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, h_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = + max(sweep_max_eff_wfn_size, + accumulate( + h_eff->ket.begin(), h_eff->ket.end(), (size_t)0, + [](size_t x, shared_ptr> y) { + return x + y->total_memory; + })); } teff += _t.get_time(); if (x_eff != nullptr) pdi = EffectiveFunctions::eigs_mixed( h_eff, x_eff, iprint >= 3, davidson_conv_thrd, - davidson_max_iter, davidson_soft_max_iter, davidson_type, + davidson_max_iter, davidson_soft_max_iter, + davidson_def_min_size, davidson_def_max_size, davidson_type, davidson_shift - xreal((FL)me->mpo->const_e), me->para_rule); else pdi = h_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter, - davidson_soft_max_iter, davidson_type, + davidson_soft_max_iter, davidson_def_min_size, + davidson_def_max_size, davidson_type, davidson_shift - xreal((FL)me->mpo->const_e), me->para_rule, ortho_bra, projection_weights); for (int i = 0; i < mket->nroots; i++) { @@ -1677,22 +1708,43 @@ template struct DMRG { sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, h_eff->op->get_total_memory() + x_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = max( + sweep_max_eff_wfn_size, + accumulate( + h_eff->ket.begin(), h_eff->ket.end(), (size_t)0, + [](size_t x, shared_ptr> y) { + return x + y->total_memory; + }) + + accumulate( + x_eff->ket.begin(), x_eff->ket.end(), (size_t)0, + [](size_t x, shared_ptr> y) { + return x + y->total_memory; + })); } else { h_eff = me->multi_eff_ham(FuseTypes::FuseLR, forward, true); sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, h_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = + max(sweep_max_eff_wfn_size, + accumulate( + h_eff->ket.begin(), h_eff->ket.end(), (size_t)0, + [](size_t x, shared_ptr> y) { + return x + y->total_memory; + })); } teff += _t.get_time(); if (x_eff != nullptr) pdi = EffectiveFunctions::eigs_mixed( h_eff, x_eff, iprint >= 3, davidson_conv_thrd, - davidson_max_iter, davidson_soft_max_iter, davidson_type, + davidson_max_iter, davidson_soft_max_iter, + davidson_def_min_size, davidson_def_max_size, davidson_type, davidson_shift - xreal((FL)me->mpo->const_e), me->para_rule); else pdi = h_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter, - davidson_soft_max_iter, davidson_type, + davidson_soft_max_iter, davidson_def_min_size, + davidson_def_max_size, davidson_type, davidson_shift - xreal((FL)me->mpo->const_e), me->para_rule, ortho_bra, projection_weights); for (int i = 0; i < mket->nroots; i++) { @@ -1802,6 +1854,7 @@ template struct DMRG { sweep_cumulative_nflop = 0; sweep_max_pket_size = 0; sweep_max_eff_ham_size = 0; + sweep_max_eff_wfn_size = 0; frame_()->reset_peak_used_memory(); vector sweep_range; if (forward) @@ -2092,6 +2145,7 @@ template struct DMRG { sweep_cumulative_nflop = 0; sweep_max_pket_size = 0; sweep_max_eff_ham_size = 0; + sweep_max_eff_wfn_size = 0; frame_()->reset_peak_used_memory(); sweep_energies.resize(me->n_sites - me->dot + 1, vector{1E9}); sweep_time.resize(me->n_sites - me->dot + 1, 0); @@ -2367,6 +2421,15 @@ template struct DMRG { << " | Tidle = " << tt[1] / comm->size << " | Twait = " << tt[2] / comm->size; } + size_t sweep_max_eff_ham_size_pm = sweep_max_eff_ham_size; + size_t sweep_max_eff_wfn_size_pm = sweep_max_eff_wfn_size; + if (me->para_rule != nullptr) { + shared_ptr> comm = + me->para_rule->comm; + uint64_t tt[2] = {(uint64_t)sweep_max_eff_ham_size_pm, + (uint64_t)sweep_max_eff_wfn_size_pm}; + comm->reduce_max_optional(&tt[0], 2, comm->root); + } size_t dmain = frame_()->peak_used_memory[0]; size_t dseco = frame_()->peak_used_memory[1]; size_t imain = frame_()->peak_used_memory[2]; @@ -2378,7 +2441,10 @@ template struct DMRG { << Parsing::to_size_string(imain + iseco) << " (" << (imain * 100 / (imain + iseco)) << "%)"; sout << " | Hmem = " - << Parsing::to_size_string(sweep_max_eff_ham_size * + << Parsing::to_size_string(sweep_max_eff_ham_size_pm * + sizeof(FL)); + sout << " | Wmem = " + << Parsing::to_size_string(sweep_max_eff_wfn_size_pm * sizeof(FL)); sout << " | Pmem = " << Parsing::to_size_string(sweep_max_pket_size * @@ -2480,6 +2546,8 @@ template struct Linear { int linear_max_iter = 5000; int linear_soft_max_iter = -1; int conv_required_sweeps = 3; + int linear_def_min_size = 2; + int linear_def_max_size = 50; ConvergenceTypes conv_type = ConvergenceTypes::LastMinimal; NoiseTypes noise_type = NoiseTypes::DensityMatrix; TruncationTypes trunc_type = TruncationTypes::Physical; @@ -2495,6 +2563,7 @@ template struct Linear { size_t sweep_cumulative_nflop = 0; size_t sweep_max_pket_size = 0; size_t sweep_max_eff_ham_size = 0; + size_t sweep_max_eff_wfn_size = 0; double tprt = 0, tmult = 0, teff = 0, tmve = 0, tblk = 0, tdm = 0, tsplt = 0, tsvd = 0; Timer _t, _t2; @@ -2722,6 +2791,8 @@ template struct Linear { linear_use_precondition, me->bra->tensors[i], right_bra); sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, l_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = + max(sweep_max_eff_wfn_size, l_eff->ket->total_memory); teff += _t.get_time(); if (eq_type == EquationTypes::Normal) { tuple, size_t, double> lpdi; @@ -2758,6 +2829,7 @@ template struct Linear { real_bra, cg_n_harmonic_projection, iprint >= 3, linear_conv_thrd, linear_max_iter, linear_soft_max_iter, + linear_def_min_size, linear_def_max_size, me->para_rule); else lpdi = EffectiveFunctions::greens_function( @@ -2798,7 +2870,9 @@ template struct Linear { lpdi = EffectiveFunctions::greens_function_squared( l_eff, lme->mpo->const_e, gf_omega, gf_eta, real_bra, cg_n_harmonic_projection, iprint >= 3, linear_conv_thrd, - linear_max_iter, linear_soft_max_iter, me->para_rule); + linear_max_iter, linear_soft_max_iter, + linear_def_min_size, linear_def_max_size, + me->para_rule); else lpdi = EffectiveFunctions::greens_function( l_eff, lme->mpo->const_e, solver_type, gf_omega, gf_eta, @@ -3437,6 +3511,8 @@ template struct Linear { me->bra->tensors[i], right_bra); sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, l_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = + max(sweep_max_eff_wfn_size, l_eff->ket->total_memory); teff += _t.get_time(); if (eq_type == EquationTypes::Normal) { tuple, size_t, double> lpdi; @@ -3473,6 +3549,7 @@ template struct Linear { real_bra, cg_n_harmonic_projection, iprint >= 3, linear_conv_thrd, linear_max_iter, linear_soft_max_iter, + linear_def_min_size, linear_def_max_size, me->para_rule); else lpdi = EffectiveFunctions::greens_function( @@ -3513,7 +3590,9 @@ template struct Linear { lpdi = EffectiveFunctions::greens_function_squared( l_eff, lme->mpo->const_e, gf_omega, gf_eta, real_bra, cg_n_harmonic_projection, iprint >= 3, linear_conv_thrd, - linear_max_iter, linear_soft_max_iter, me->para_rule); + linear_max_iter, linear_soft_max_iter, + linear_def_min_size, linear_def_max_size, + me->para_rule); else lpdi = EffectiveFunctions::greens_function( l_eff, lme->mpo->const_e, solver_type, gf_omega, gf_eta, @@ -3964,6 +4043,7 @@ template struct Linear { sweep_cumulative_nflop = 0; sweep_max_pket_size = 0; sweep_max_eff_ham_size = 0; + sweep_max_eff_wfn_size = 0; frame_()->reset_peak_used_memory(); vector sweep_range; if (forward) @@ -4255,6 +4335,14 @@ template struct Linear { << " | Twait = " << tt[2]; cout << endl; } + size_t sweep_max_eff_ham_size_pm = sweep_max_eff_ham_size; + size_t sweep_max_eff_wfn_size_pm = sweep_max_eff_wfn_size; + if (lme != nullptr && lme->para_rule != nullptr) { + uint64_t tt[2] = {(uint64_t)sweep_max_eff_ham_size_pm, + (uint64_t)sweep_max_eff_wfn_size_pm}; + lme->para_rule->comm->reduce_max_optional( + &tt[0], 2, lme->para_rule->comm->root); + } size_t dmain = frame_()->peak_used_memory[0]; size_t dseco = frame_()->peak_used_memory[1]; size_t imain = frame_()->peak_used_memory[2]; @@ -4266,7 +4354,10 @@ template struct Linear { << Parsing::to_size_string(imain + iseco) << " (" << (imain * 100 / (imain + iseco)) << "%)"; cout << " | Hmem = " - << Parsing::to_size_string(sweep_max_eff_ham_size * + << Parsing::to_size_string(sweep_max_eff_ham_size_pm * + sizeof(FL)); + cout << " | Wmem = " + << Parsing::to_size_string(sweep_max_eff_wfn_size_pm * sizeof(FL)); cout << " | Pmem = " << Parsing::to_size_string(sweep_max_pket_size * @@ -4384,6 +4475,7 @@ struct Expect { vector wfn_spectra; size_t sweep_cumulative_nflop = 0; size_t sweep_max_eff_ham_size = 0; + size_t sweep_max_eff_wfn_size = 0; pair max_move_env_mem; double tex = 0, teff = 0, tmve = 0, tblk = 0; Timer _t, _t2; @@ -4566,6 +4658,8 @@ struct Expect { teff += _t.get_time(); sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, k_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = + max(sweep_max_eff_wfn_size, k_eff->ket->total_memory); pdi = k_eff->expect(me->mpo->const_e, algo_type, ex_type, me->para_rule); tex += _t.get_time(); @@ -4590,6 +4684,8 @@ struct Expect { teff += _t.get_time(); sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, k_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = + max(sweep_max_eff_wfn_size, k_eff->ket->total_memory); pdi = k_eff->expect(me->mpo->const_e, algo_type, ex_type, me->para_rule); tex += _t.get_time(); @@ -4737,6 +4833,8 @@ struct Expect { teff += _t.get_time(); sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, h_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = + max(sweep_max_eff_wfn_size, h_eff->ket->total_memory); auto pdi = h_eff->expect(me->mpo->const_e, algo_type, ex_type, me->para_rule, fuse_left); tex += _t.get_time(); @@ -4905,6 +5003,8 @@ struct Expect { teff += _t.get_time(); sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, h_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = + max(sweep_max_eff_wfn_size, h_eff->ket->total_memory); auto pdi = h_eff->expect(me->mpo->const_e, algo_type, ex_type, me->para_rule); tex += _t.get_time(); @@ -5049,6 +5149,12 @@ struct Expect { teff += _t.get_time(); sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, h_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = max( + sweep_max_eff_wfn_size, + accumulate(h_eff->ket.begin(), h_eff->ket.end(), (size_t)0, + [](size_t x, shared_ptr> y) { + return x + y->total_memory; + })); auto pdi = h_eff->expect(me->mpo->const_e, algo_type, ex_type, me->para_rule); tex += _t.get_time(); @@ -5284,6 +5390,12 @@ struct Expect { teff += _t.get_time(); sweep_max_eff_ham_size = max(sweep_max_eff_ham_size, h_eff->op->get_total_memory()); + sweep_max_eff_wfn_size = max( + sweep_max_eff_wfn_size, + accumulate(h_eff->ket.begin(), h_eff->ket.end(), (size_t)0, + [](size_t x, shared_ptr> y) { + return x + y->total_memory; + })); auto pdi = h_eff->expect(me->mpo->const_e, algo_type, ex_type, me->para_rule); tex += _t.get_time(); @@ -5466,6 +5578,7 @@ struct Expect { me->prepare(); sweep_cumulative_nflop = 0; sweep_max_eff_ham_size = 0; + sweep_max_eff_wfn_size = 0; frame_()->reset_peak_used_memory(); vector sweep_range; if (forward) @@ -5545,6 +5658,15 @@ struct Expect { << " | Twait = " << tt[2]; cout << endl; } + size_t sweep_max_eff_ham_size_pm = sweep_max_eff_ham_size; + size_t sweep_max_eff_wfn_size_pm = sweep_max_eff_wfn_size; + if (me->para_rule != nullptr) { + shared_ptr> comm = + me->para_rule->comm; + uint64_t tt[2] = {(uint64_t)sweep_max_eff_ham_size_pm, + (uint64_t)sweep_max_eff_wfn_size_pm}; + comm->reduce_max_optional(&tt[0], 2, comm->root); + } size_t dmain = frame_()->peak_used_memory[0]; size_t dseco = frame_()->peak_used_memory[1]; size_t imain = frame_()->peak_used_memory[2]; @@ -5554,7 +5676,10 @@ struct Expect { cout << " | Imem = " << Parsing::to_size_string(imain + iseco) << " (" << (imain * 100 / (imain + iseco)) << "%)"; cout << " | Hmem = " - << Parsing::to_size_string(sweep_max_eff_ham_size * + << Parsing::to_size_string(sweep_max_eff_ham_size_pm * + sizeof(FL)); + cout << " | Wmem = " + << Parsing::to_size_string(sweep_max_eff_wfn_size_pm * sizeof(FL)); cout << " | MaxBmem = " << Parsing::to_size_string(max_move_env_mem.first * diff --git a/src/pybind/pybind_dmrg.hpp b/src/pybind/pybind_dmrg.hpp index 6e7a3456..09b4706e 100644 --- a/src/pybind/pybind_dmrg.hpp +++ b/src/pybind/pybind_dmrg.hpp @@ -1196,6 +1196,10 @@ void bind_fl_dmrg(py::module &m) { &DMRG::davidson_max_iter) .def_readwrite("davidson_soft_max_iter", &DMRG::davidson_soft_max_iter) + .def_readwrite("davidson_def_min_size", + &DMRG::davidson_def_min_size) + .def_readwrite("davidson_def_max_size", + &DMRG::davidson_def_max_size) .def_readwrite("davidson_shift", &DMRG::davidson_shift) .def_readwrite("davidson_type", &DMRG::davidson_type) .def_readwrite("conn_adjust_step", &DMRG::conn_adjust_step) @@ -1490,6 +1494,10 @@ void bind_fl_linear(py::module &m) { &Linear::linear_soft_max_iter) .def_readwrite("conv_required_sweeps", &Linear::conv_required_sweeps) + .def_readwrite("linear_def_min_size", + &Linear::linear_def_min_size) + .def_readwrite("linear_def_max_size", + &Linear::linear_def_max_size) .def_readwrite("gf_omega", &Linear::gf_omega) .def_readwrite("gf_eta", &Linear::gf_eta) .def_readwrite("gf_extra_omegas", &Linear::gf_extra_omegas) @@ -2465,13 +2473,17 @@ extern template auto bind_fl_trans_mps_spin_specific>( -> decltype(typename SU2::is_su2_t(typename SZ::is_sz_t())); extern template void -bind_fl_trans_mps>(py::module &m, const string &aux_name); +bind_fl_trans_mps>(py::module &m, + const string &aux_name); extern template void -bind_fl_trans_mps, double>(py::module &m, const string &aux_name); +bind_fl_trans_mps, double>(py::module &m, + const string &aux_name); extern template void -bind_fl_trans_mps>(py::module &m, const string &aux_name); +bind_fl_trans_mps>(py::module &m, + const string &aux_name); extern template void -bind_fl_trans_mps, double>(py::module &m, const string &aux_name); +bind_fl_trans_mps, double>(py::module &m, + const string &aux_name); #endif #endif @@ -2726,13 +2738,17 @@ extern template auto bind_fl_trans_mps_spin_specific>( #endif extern template void -bind_fl_trans_mps>(py::module &m, const string &aux_name); +bind_fl_trans_mps>(py::module &m, + const string &aux_name); extern template void -bind_fl_trans_mps, double>(py::module &m, const string &aux_name); +bind_fl_trans_mps, double>(py::module &m, + const string &aux_name); extern template void -bind_fl_trans_mps>(py::module &m, const string &aux_name); +bind_fl_trans_mps>(py::module &m, + const string &aux_name); extern template void -bind_fl_trans_mps, double>(py::module &m, const string &aux_name); +bind_fl_trans_mps, double>(py::module &m, + const string &aux_name); #endif #endif @@ -2929,13 +2945,17 @@ bind_fl_trans_mps, complex>(py::module &m, const string &aux_name); extern template void -bind_fl_trans_mps>(py::module &m, const string &aux_name); +bind_fl_trans_mps>(py::module &m, + const string &aux_name); extern template void -bind_fl_trans_mps, float>(py::module &m, const string &aux_name); +bind_fl_trans_mps, float>(py::module &m, + const string &aux_name); extern template void -bind_fl_trans_mps>(py::module &m, const string &aux_name); +bind_fl_trans_mps>(py::module &m, + const string &aux_name); extern template void -bind_fl_trans_mps, float>(py::module &m, const string &aux_name); +bind_fl_trans_mps, float>(py::module &m, + const string &aux_name); #endif @@ -3074,13 +3094,17 @@ bind_fl_trans_mps, complex>(py::module &m, const string &aux_name); extern template void -bind_fl_trans_mps>(py::module &m, const string &aux_name); +bind_fl_trans_mps>(py::module &m, + const string &aux_name); extern template void -bind_fl_trans_mps, float>(py::module &m, const string &aux_name); +bind_fl_trans_mps, float>(py::module &m, + const string &aux_name); extern template void -bind_fl_trans_mps>(py::module &m, const string &aux_name); +bind_fl_trans_mps>(py::module &m, + const string &aux_name); extern template void -bind_fl_trans_mps, float>(py::module &m, const string &aux_name); +bind_fl_trans_mps, float>(py::module &m, + const string &aux_name); #endif #endif