From a72607e0e0a92897cb2298d67de09a79038e199e Mon Sep 17 00:00:00 2001
From: Huanchen Zhai <hczhai@ucla.edu>
Date: Thu, 30 Nov 2023 20:31:38 -0800
Subject: [PATCH] allow changing dav subspace size; print wfn mem

---
 pyblock2/driver/block2main                |   4 +
 pyblock2/driver/core.py                   |   2 +
 pyblock2/driver/parser.py                 |   2 +-
 src/big_site/sweep_algorithm_big_site.hpp |  14 ++-
 src/core/parallel_mpi.hpp                 |  10 ++
 src/core/parallel_rule.hpp                |   4 +
 src/dmrg/effective_functions.hpp          |  17 ++-
 src/dmrg/effective_hamiltonian.hpp        |  27 ++--
 src/dmrg/sweep_algorithm.hpp              | 147 ++++++++++++++++++++--
 src/pybind/pybind_dmrg.hpp                |  56 ++++++---
 10 files changed, 235 insertions(+), 48 deletions(-)

diff --git a/pyblock2/driver/block2main b/pyblock2/driver/block2main
index 877bcd41..711dbd5e 100755
--- a/pyblock2/driver/block2main
+++ b/pyblock2/driver/block2main
@@ -2324,6 +2324,8 @@ if not pre_run:
             dmrg.davidson_max_iter = int(dic.get("davidson_max_iter", 5000))
             dmrg.davidson_soft_max_iter = int(
                 dic.get("davidson_soft_max_iter", 4000))
+            dmrg.davidson_def_max_size = int(
+                dic.get("davidson_def_max_size", 50))
             dmrg.store_wfn_spectra = store_wfn_spectra
             dmrg.site_dependent_bond_dims = site_dependent_bdims
 
@@ -2466,6 +2468,8 @@ if not pre_run:
         dmrg.davidson_max_iter = int(dic.get("davidson_max_iter", 5000))
         dmrg.davidson_soft_max_iter = int(
             dic.get("davidson_soft_max_iter", 4000))
+        dmrg.davidson_def_max_size = int(
+            dic.get("davidson_def_max_size", 50))
         dmrg.decomp_type = decomp_type
         dmrg.trunc_type = trunc_type
         dmrg.davidson_conv_thrds = VectorFP(dav_thrds)
diff --git a/pyblock2/driver/core.py b/pyblock2/driver/core.py
index fc5b783e..0d7f3f6b 100644
--- a/pyblock2/driver/core.py
+++ b/pyblock2/driver/core.py
@@ -2775,6 +2775,7 @@ def dmrg(
         cutoff=1e-20,
         twosite_to_onesite=None,
         dav_max_iter=4000,
+        dav_def_max_size=50,
         proj_mpss=None,
         proj_weights=None,
         store_wfn_spectra=True,
@@ -2820,6 +2821,7 @@ def dmrg(
         dmrg.davidson_conv_thrds = bw.VectorFP(thrds)
         dmrg.davidson_max_iter = dav_max_iter + 100
         dmrg.davidson_soft_max_iter = dav_max_iter
+        dmrg.davidson_def_max_size = dav_def_max_size
         dmrg.store_wfn_spectra = store_wfn_spectra
         dmrg.iprint = iprint
         dmrg.cutoff = cutoff
diff --git a/pyblock2/driver/parser.py b/pyblock2/driver/parser.py
index d6679cd5..c1ef5d75 100755
--- a/pyblock2/driver/parser.py
+++ b/pyblock2/driver/parser.py
@@ -42,7 +42,7 @@
               "model", "k_symmetry", "k_irrep", "k_mod", "init_mps_center", "heisenberg",
               "use_complex", "real_density_matrix", "expt_algo_type", "one_body_parallel_rule",
               "davidson_max_iter", "davidson_soft_max_iter", "linear_soft_max_iter",
-              "n_sub_sweeps", "complex_mps", "split_states", "trans_mps_to_complex",
+              "davidson_def_max_size", "n_sub_sweeps", "complex_mps", "split_states", "trans_mps_to_complex",
               "use_general_spin", "trans_integral_to_spin_orbital", "store_wfn_spectra",
               "tran_bra_range", "tran_ket_range", "tran_triangular", "use_hybrid_complex",
               "mem_ratio", "min_mpo_mem", "qc_mpo_type", "full_integral", "skip_inact_ext_sites",
diff --git a/src/big_site/sweep_algorithm_big_site.hpp b/src/big_site/sweep_algorithm_big_site.hpp
index 59da85ae..d1f80565 100644
--- a/src/big_site/sweep_algorithm_big_site.hpp
+++ b/src/big_site/sweep_algorithm_big_site.hpp
@@ -297,6 +297,8 @@ struct DMRGBigSiteAQCC : DMRGBigSite<S, FL, FLS> {
     using DMRGBigSite<S, FL, FLS>::ext_mes;
     using DMRGBigSite<S, FL, FLS>::davidson_soft_max_iter;
     using DMRGBigSite<S, FL, FLS>::davidson_max_iter;
+    using DMRGBigSite<S, FL, FLS>::davidson_def_min_size;
+    using DMRGBigSite<S, FL, FLS>::davidson_def_max_size;
     using DMRGBigSite<S, FL, FLS>::noise_type;
     using DMRGBigSite<S, FL, FLS>::decomp_type;
     using DMRGBigSite<S, FL, FLS>::energies;
@@ -464,7 +466,8 @@ struct DMRGBigSiteAQCC : DMRGBigSite<S, FL, FLS> {
         // TODO: For RAS mode, it might be good to do several iterations
         //       for the first site as well.
         pdi = aqcc_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter,
-                             davidson_soft_max_iter, DavidsonTypes::Normal, 0.0,
+                             davidson_soft_max_iter, davidson_def_min_size,
+                             davidson_def_max_size, DavidsonTypes::Normal, 0.0,
                              me->para_rule);
         teig += _t.get_time();
         if ((noise_type & NoiseTypes::Perturbative) && noise != 0)
@@ -549,6 +552,7 @@ struct DMRGBigSiteAQCC : DMRGBigSite<S, FL, FLS> {
                 const auto pdi2 =
                     aqcc_eff->eigs(iprint >= 3, davidson_conv_thrd,
                                    davidson_max_iter, davidson_soft_max_iter,
+                                   davidson_def_min_size, davidson_def_max_size,
                                    DavidsonTypes::Normal, 0.0, me->para_rule);
                 const FPS energy =
                     (FPS)std::get<0>(pdi2) + (FPS)me->mpo->const_e;
@@ -598,6 +602,7 @@ struct DMRGBigSiteAQCC : DMRGBigSite<S, FL, FLS> {
             auto aqcc_eff = get_aqcc_eff(h_eff, d_eff1, d_eff2, d_eff3, d_eff4);
             pdi = aqcc_eff->eigs(iprint >= 3, davidson_conv_thrd,
                                  davidson_max_iter, davidson_soft_max_iter,
+                                 davidson_def_min_size, davidson_def_max_size,
                                  DavidsonTypes::Normal, 0.0, me->para_rule);
             const FPS energy = (FPS)std::get<0>(pdi) + (FPS)me->mpo->const_e;
             smallest_energy = min(energy, smallest_energy);
@@ -630,6 +635,8 @@ struct DMRGBigSiteAQCCOLD : DMRGBigSite<S, FL, FLS> {
     using DMRGBigSite<S, FL, FLS>::me;
     using DMRGBigSite<S, FL, FLS>::davidson_soft_max_iter;
     using DMRGBigSite<S, FL, FLS>::davidson_max_iter;
+    using DMRGBigSite<S, FL, FLS>::davidson_def_min_size;
+    using DMRGBigSite<S, FL, FLS>::davidson_def_max_size;
     using DMRGBigSite<S, FL, FLS>::noise_type;
     using DMRGBigSite<S, FL, FLS>::decomp_type;
     using DMRGBigSite<S, FL, FLS>::energies;
@@ -721,7 +728,8 @@ struct DMRGBigSiteAQCCOLD : DMRGBigSite<S, FL, FLS> {
                         Partition<S, FL>::get_uniq_labels({h_eff->hop_mat});
                     vector<vector<pair<uint8_t, S>>> msubsl =
                         Partition<S, FL>::get_uniq_sub_labels(
-                            h_eff->op->mat, h_eff->hop_mat, msl, h_eff->hop_left_vacuum);
+                            h_eff->op->mat, h_eff->hop_mat, msl,
+                            h_eff->hop_left_vacuum);
                     diag_info->initialize_diag(
                         cdq, h_eff->opdq, msubsl[0], h_eff->left_op_infos,
                         h_eff->right_op_infos, h_eff->diag->info,
@@ -749,6 +757,7 @@ struct DMRGBigSiteAQCCOLD : DMRGBigSite<S, FL, FLS> {
                 const auto pdi2 =
                     h_eff->eigs(iprint >= 3, davidson_conv_thrd,
                                 davidson_max_iter, davidson_soft_max_iter,
+                                davidson_def_min_size, davidson_def_max_size,
                                 DavidsonTypes::Normal, 0.0, me->para_rule);
                 const auto energy =
                     (FPS)std::get<0>(pdi2) + (FPS)me->mpo->const_e;
@@ -802,6 +811,7 @@ struct DMRGBigSiteAQCCOLD : DMRGBigSite<S, FL, FLS> {
         } else {
             pdi = h_eff->eigs(iprint >= 3, davidson_conv_thrd,
                               davidson_max_iter, davidson_soft_max_iter,
+                              davidson_def_min_size, davidson_def_max_size,
                               DavidsonTypes::Normal, 0.0, me->para_rule);
         }
         teig += _t.get_time();
diff --git a/src/core/parallel_mpi.hpp b/src/core/parallel_mpi.hpp
index 7cbb7f2a..e8c73da5 100644
--- a/src/core/parallel_mpi.hpp
+++ b/src/core/parallel_mpi.hpp
@@ -384,6 +384,13 @@ template <typename S> struct MPICommunicator : ParallelCommunicator<S> {
     void allreduce_max(vector<complex<float>> &vs) override {
         allreduce_max(vs.data(), vs.size());
     }
+    void reduce_max(uint64_t *data, size_t len, int owner) override {
+        _t.get_time();
+        int ierr = MPI_Reduce(rank == owner ? MPI_IN_PLACE : data, data, len,
+                              MPI_UINT64_T, MPI_MAX, owner, comm);
+        assert(ierr == 0);
+        tcomm += _t.get_time();
+    }
     void allreduce_min(double *data, size_t len) override {
         _t.get_time();
         for (size_t offset = 0; offset < len; offset += chunk_size) {
@@ -725,6 +732,9 @@ template <typename S> struct MPICommunicator : ParallelCommunicator<S> {
     void reduce_sum_optional(uint64_t *data, size_t len, int owner) override {
         reduce_sum(data, len, owner);
     }
+    void reduce_max_optional(uint64_t *data, size_t len, int owner) override {
+        reduce_max(data, len, owner);
+    }
     void waitall() override {
         _t.get_time();
         int ierr =
diff --git a/src/core/parallel_rule.hpp b/src/core/parallel_rule.hpp
index 2c7061e1..5f9e686e 100644
--- a/src/core/parallel_rule.hpp
+++ b/src/core/parallel_rule.hpp
@@ -214,6 +214,9 @@ template <typename S> struct ParallelCommunicator {
     virtual void allreduce_max(vector<complex<float>> &vs) {
         assert(size == 1);
     }
+    virtual void reduce_max(uint64_t *data, size_t len, int owner) {
+        assert(size == 1);
+    }
     virtual void reduce_sum(const shared_ptr<SparseMatrixGroup<S, double>> &mat,
                             int owner) {
         assert(size == 1);
@@ -299,6 +302,7 @@ template <typename S> struct ParallelCommunicator {
     // mainly for no communication parallel execution in serial
     virtual void reduce_sum_optional(double *data, size_t len, int owner) {}
     virtual void reduce_sum_optional(uint64_t *data, size_t len, int owner) {}
+    virtual void reduce_max_optional(uint64_t *data, size_t len, int owner) {}
     virtual void allreduce_logical_or(bool &v) { assert(size == 1); }
     virtual void waitall() { assert(size == 1); }
 };
diff --git a/src/dmrg/effective_functions.hpp b/src/dmrg/effective_functions.hpp
index 045716d6..6d07d7bf 100644
--- a/src/dmrg/effective_functions.hpp
+++ b/src/dmrg/effective_functions.hpp
@@ -294,7 +294,8 @@ struct EffectiveFunctions<
         typename const_fl_type<FL>::FL const_e, FL omega, FL eta,
         const shared_ptr<SparseMatrix<S, FL>> &real_bra,
         int n_harmonic_projection = 0, bool iprint = false, FP conv_thrd = 5E-6,
-        int max_iter = 5000, int soft_max_iter = -1,
+        int max_iter = 5000, int soft_max_iter = -1, int deflation_min_size = 2,
+        int deflation_max_size = 50,
         const shared_ptr<ParallelRule<S>> &para_rule = nullptr) {
         int nmult = 0, nmultx = 0;
         frame_<FP>()->activate(0);
@@ -367,7 +368,8 @@ struct EffectiveFunctions<
                 DavidsonTypes::HarmonicGreaterThan | DavidsonTypes::NoPrecond,
                 nmultx, iprint,
                 para_rule == nullptr ? nullptr : para_rule->comm, 1E-4,
-                max_iter, soft_max_iter, 2, 50);
+                max_iter, soft_max_iter, deflation_min_size,
+                deflation_max_size);
             nmultp = nmult;
             nmult = 0;
             igf = IterativeMatrixFunctions<FL>::deflated_conjugate_gradient(
@@ -465,7 +467,8 @@ struct EffectiveFunctions<
         const shared_ptr<EffectiveHamiltonian<S, FL, MultiMPS<S, FL>>> &h_eff,
         const shared_ptr<EffectiveHamiltonian<S, FC, MultiMPS<S, FC>>> &x_eff,
         bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000,
-        int soft_max_iter = -1,
+        int soft_max_iter = -1, int deflation_min_size = 2,
+        int deflation_max_size = 50,
         DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0,
         const shared_ptr<ParallelRule<S>> &para_rule = nullptr) {
         int ndav = 0;
@@ -534,7 +537,7 @@ struct EffectiveFunctions<
         vector<FP> xeners = IterativeMatrixFunctions<FC>::harmonic_davidson(
             f, aa, bs, shift, davidson_type, ndav, iprint,
             para_rule == nullptr ? nullptr : para_rule->comm, conv_thrd,
-            max_iter, soft_max_iter);
+            max_iter, soft_max_iter, deflation_min_size, deflation_max_size);
         vector<typename const_fl_type<FP>::FL> eners(xeners.size());
         for (size_t i = 0; i < xeners.size(); i++)
             eners[i] = (typename const_fl_type<FP>::FL)xeners[i];
@@ -675,7 +678,8 @@ struct EffectiveFunctions<S, FL,
         typename const_fl_type<FL>::FL const_e, FL omega, FL eta,
         const shared_ptr<SparseMatrix<S, FL>> &real_bra,
         int n_harmonic_projection = 0, bool iprint = false, FP conv_thrd = 5E-6,
-        int max_iter = 5000, int soft_max_iter = -1,
+        int max_iter = 5000, int soft_max_iter = -1, int deflation_min_size = 2,
+        int deflation_max_size = 50,
         const shared_ptr<ParallelRule<S>> &para_rule = nullptr) {
         assert(false);
         return make_tuple(0.0, make_pair(0, 0), (size_t)0, 0.0);
@@ -699,7 +703,8 @@ struct EffectiveFunctions<S, FL,
         const shared_ptr<EffectiveHamiltonian<S, FL, MultiMPS<S, FL>>> &h_eff,
         const shared_ptr<EffectiveHamiltonian<S, FC, MultiMPS<S, FC>>> &x_eff,
         bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000,
-        int soft_max_iter = -1,
+        int soft_max_iter = -1, int deflation_min_size = 2,
+        int deflation_max_size = 50,
         DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0,
         const shared_ptr<ParallelRule<S>> &para_rule = nullptr) {
         assert(false);
diff --git a/src/dmrg/effective_hamiltonian.hpp b/src/dmrg/effective_hamiltonian.hpp
index c3271158..d6dd4c4b 100644
--- a/src/dmrg/effective_hamiltonian.hpp
+++ b/src/dmrg/effective_hamiltonian.hpp
@@ -408,7 +408,8 @@ struct EffectiveHamiltonian<S, FL, MPS<S, FL>> {
     // energy, ndav, nflop, tdav
     tuple<typename const_fl_type<FP>::FL, int, size_t, double>
     eigs(bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000,
-         int soft_max_iter = -1,
+         int soft_max_iter = -1, int deflation_min_size = 2,
+         int deflation_max_size = 50,
          DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0,
          const shared_ptr<ParallelRule<S>> &para_rule = nullptr,
          const vector<shared_ptr<SparseMatrix<S, FL>>> &ortho_bra =
@@ -437,13 +438,13 @@ struct EffectiveHamiltonian<S, FL, MPS<S, FL>> {
                 ? IterativeMatrixFunctions<FL>::harmonic_davidson(
                       *tf, aa, bs, shift, davidson_type, ndav, iprint,
                       para_rule == nullptr ? nullptr : para_rule->comm,
-                      conv_thrd, max_iter, soft_max_iter, 2, 50, ors,
-                      projection_weights)
+                      conv_thrd, max_iter, soft_max_iter, deflation_min_size,
+                      deflation_max_size, ors, projection_weights)
                 : IterativeMatrixFunctions<FL>::harmonic_davidson(
                       *this, aa, bs, shift, davidson_type, ndav, iprint,
                       para_rule == nullptr ? nullptr : para_rule->comm,
-                      conv_thrd, max_iter, soft_max_iter, 2, 50, ors,
-                      projection_weights);
+                      conv_thrd, max_iter, soft_max_iter, deflation_min_size,
+                      deflation_max_size, ors, projection_weights);
         post_precompute();
         uint64_t nflop = tf->opf->seq->cumulative_nflop;
         if (para_rule != nullptr)
@@ -1002,7 +1003,8 @@ template <typename S, typename FL> struct LinearEffectiveHamiltonian {
     // energy, ndav, nflop, tdav
     tuple<typename const_fl_type<FP>::FL, int, size_t, double>
     eigs(bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000,
-         int soft_max_iter = -1,
+         int soft_max_iter = -1, int deflation_min_size = 2,
+         int deflation_max_size = 50,
          DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0,
          const shared_ptr<ParallelRule<S>> &para_rule = nullptr) {
         int ndav = 0;
@@ -1029,7 +1031,7 @@ template <typename S, typename FL> struct LinearEffectiveHamiltonian {
         vector<FP> eners = IterativeMatrixFunctions<FL>::harmonic_davidson(
             *this, aa, bs, shift, davidson_type, ndav, iprint,
             para_rule == nullptr ? nullptr : para_rule->comm, conv_thrd,
-            max_iter, soft_max_iter);
+            max_iter, soft_max_iter, deflation_min_size, deflation_max_size);
         for (size_t ih = 0; ih < h_effs.size(); ih++)
             h_effs[ih]->post_precompute();
         uint64_t nflop = tf->opf->seq->cumulative_nflop;
@@ -1458,7 +1460,8 @@ struct EffectiveHamiltonian<S, FL, MultiMPS<S, FL>> {
     // energies, ndav, nflop, tdav
     tuple<vector<typename const_fl_type<FP>::FL>, int, size_t, double>
     eigs(bool iprint = false, FP conv_thrd = 5E-6, int max_iter = 5000,
-         int soft_max_iter = -1,
+         int soft_max_iter = -1, int deflation_min_size = 2,
+         int deflation_max_size = 50,
          DavidsonTypes davidson_type = DavidsonTypes::Normal, FP shift = 0,
          const shared_ptr<ParallelRule<S>> &para_rule = nullptr,
          const vector<shared_ptr<SparseMatrix<S, FL>>> &ortho_bra =
@@ -1492,13 +1495,13 @@ struct EffectiveHamiltonian<S, FL, MultiMPS<S, FL>> {
                 ? IterativeMatrixFunctions<FL>::harmonic_davidson(
                       *tf, aa, bs, shift, davidson_type, ndav, iprint,
                       para_rule == nullptr ? nullptr : para_rule->comm,
-                      conv_thrd, max_iter, soft_max_iter, 2, 50, ors,
-                      projection_weights)
+                      conv_thrd, max_iter, soft_max_iter, deflation_min_size,
+                      deflation_max_size, ors, projection_weights)
                 : IterativeMatrixFunctions<FL>::harmonic_davidson(
                       *this, aa, bs, shift, davidson_type, ndav, iprint,
                       para_rule == nullptr ? nullptr : para_rule->comm,
-                      conv_thrd, max_iter, soft_max_iter, 2, 50, ors,
-                      projection_weights);
+                      conv_thrd, max_iter, soft_max_iter, deflation_min_size,
+                      deflation_max_size, ors, projection_weights);
         vector<typename const_fl_type<FP>::FL> eners(xeners.size());
         for (size_t i = 0; i < xeners.size(); i++)
             eners[i] = (typename const_fl_type<FP>::FL)xeners[i];
diff --git a/src/dmrg/sweep_algorithm.hpp b/src/dmrg/sweep_algorithm.hpp
index f01e7a4d..33587c99 100644
--- a/src/dmrg/sweep_algorithm.hpp
+++ b/src/dmrg/sweep_algorithm.hpp
@@ -36,6 +36,7 @@
 #include <iomanip>
 #include <iostream>
 #include <memory>
+#include <numeric>
 #include <string>
 #include <tuple>
 #include <utility>
@@ -107,6 +108,9 @@ template <typename S, typename FL, typename FLS> struct DMRG {
     size_t sweep_cumulative_nflop = 0;
     size_t sweep_max_pket_size = 0;
     size_t sweep_max_eff_ham_size = 0;
+    size_t sweep_max_eff_wfn_size = 0;
+    int davidson_def_min_size = 2;
+    int davidson_def_max_size = 50;
     double tprt = 0, teig = 0, teff = 0, tmve = 0, tblk = 0, tdm = 0, tsplt = 0,
            tsvd = 0, torth = 0;
     bool print_connection_time = false;
@@ -586,9 +590,12 @@ template <typename S, typename FL, typename FLS> struct DMRG {
             me->bra->tensors[i], me->ket->tensors[i]);
         sweep_max_eff_ham_size =
             max(sweep_max_eff_ham_size, h_eff->op->get_total_memory());
+        sweep_max_eff_wfn_size =
+            max(sweep_max_eff_wfn_size, h_eff->ket->total_memory);
         teff += _t.get_time();
         pdi = h_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter,
-                          davidson_soft_max_iter, davidson_type,
+                          davidson_soft_max_iter, davidson_def_min_size,
+                          davidson_def_max_size, davidson_type,
                           davidson_shift - xreal<FL>((FL)me->mpo->const_e),
                           me->para_rule, ortho_bra, projection_weights);
         teig += _t.get_time();
@@ -885,9 +892,12 @@ template <typename S, typename FL, typename FLS> struct DMRG {
                         me->ket->tensors[i]);
         sweep_max_eff_ham_size =
             max(sweep_max_eff_ham_size, h_eff->op->get_total_memory());
+        sweep_max_eff_wfn_size =
+            max(sweep_max_eff_wfn_size, h_eff->ket->total_memory);
         teff += _t.get_time();
         pdi = h_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter,
-                          davidson_soft_max_iter, davidson_type,
+                          davidson_soft_max_iter, davidson_def_min_size,
+                          davidson_def_max_size, davidson_type,
                           davidson_shift - xreal<FL>((FL)me->mpo->const_e),
                           me->para_rule, ortho_bra, projection_weights);
         teig += _t.get_time();
@@ -1336,23 +1346,44 @@ template <typename S, typename FL, typename FLS> struct DMRG {
             sweep_max_eff_ham_size =
                 max(sweep_max_eff_ham_size, h_eff->op->get_total_memory() +
                                                 x_eff->op->get_total_memory());
+            sweep_max_eff_wfn_size = max(
+                sweep_max_eff_wfn_size,
+                accumulate(
+                    h_eff->ket.begin(), h_eff->ket.end(), (size_t)0,
+                    [](size_t x, shared_ptr<SparseMatrixGroup<S, FL>> y) {
+                        return x + y->total_memory;
+                    }) +
+                    accumulate(
+                        x_eff->ket.begin(), x_eff->ket.end(), (size_t)0,
+                        [](size_t x, shared_ptr<SparseMatrixGroup<S, FC>> y) {
+                            return x + y->total_memory;
+                        }));
         } else {
             h_eff = me->multi_eff_ham(
                 fuse_left ? FuseTypes::FuseL : FuseTypes::FuseR, forward, true);
             sweep_max_eff_ham_size =
                 max(sweep_max_eff_ham_size, h_eff->op->get_total_memory());
+            sweep_max_eff_wfn_size =
+                max(sweep_max_eff_wfn_size,
+                    accumulate(
+                        h_eff->ket.begin(), h_eff->ket.end(), (size_t)0,
+                        [](size_t x, shared_ptr<SparseMatrixGroup<S, FL>> y) {
+                            return x + y->total_memory;
+                        }));
         }
         teff += _t.get_time();
         if (x_eff != nullptr)
             pdi = EffectiveFunctions<S, FL>::eigs_mixed(
                 h_eff, x_eff, iprint >= 3, davidson_conv_thrd,
-                davidson_max_iter, davidson_soft_max_iter, davidson_type,
+                davidson_max_iter, davidson_soft_max_iter,
+                davidson_def_min_size, davidson_def_max_size, davidson_type,
                 davidson_shift - xreal<FL>((FL)me->mpo->const_e),
                 me->para_rule);
         else
             pdi =
                 h_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter,
-                            davidson_soft_max_iter, davidson_type,
+                            davidson_soft_max_iter, davidson_def_min_size,
+                            davidson_def_max_size, davidson_type,
                             davidson_shift - xreal<FL>((FL)me->mpo->const_e),
                             me->para_rule, ortho_bra, projection_weights);
         for (int i = 0; i < mket->nroots; i++) {
@@ -1677,22 +1708,43 @@ template <typename S, typename FL, typename FLS> struct DMRG {
             sweep_max_eff_ham_size =
                 max(sweep_max_eff_ham_size, h_eff->op->get_total_memory() +
                                                 x_eff->op->get_total_memory());
+            sweep_max_eff_wfn_size = max(
+                sweep_max_eff_wfn_size,
+                accumulate(
+                    h_eff->ket.begin(), h_eff->ket.end(), (size_t)0,
+                    [](size_t x, shared_ptr<SparseMatrixGroup<S, FL>> y) {
+                        return x + y->total_memory;
+                    }) +
+                    accumulate(
+                        x_eff->ket.begin(), x_eff->ket.end(), (size_t)0,
+                        [](size_t x, shared_ptr<SparseMatrixGroup<S, FC>> y) {
+                            return x + y->total_memory;
+                        }));
         } else {
             h_eff = me->multi_eff_ham(FuseTypes::FuseLR, forward, true);
             sweep_max_eff_ham_size =
                 max(sweep_max_eff_ham_size, h_eff->op->get_total_memory());
+            sweep_max_eff_wfn_size =
+                max(sweep_max_eff_wfn_size,
+                    accumulate(
+                        h_eff->ket.begin(), h_eff->ket.end(), (size_t)0,
+                        [](size_t x, shared_ptr<SparseMatrixGroup<S, FL>> y) {
+                            return x + y->total_memory;
+                        }));
         }
         teff += _t.get_time();
         if (x_eff != nullptr)
             pdi = EffectiveFunctions<S, FL>::eigs_mixed(
                 h_eff, x_eff, iprint >= 3, davidson_conv_thrd,
-                davidson_max_iter, davidson_soft_max_iter, davidson_type,
+                davidson_max_iter, davidson_soft_max_iter,
+                davidson_def_min_size, davidson_def_max_size, davidson_type,
                 davidson_shift - xreal<FL>((FL)me->mpo->const_e),
                 me->para_rule);
         else
             pdi =
                 h_eff->eigs(iprint >= 3, davidson_conv_thrd, davidson_max_iter,
-                            davidson_soft_max_iter, davidson_type,
+                            davidson_soft_max_iter, davidson_def_min_size,
+                            davidson_def_max_size, davidson_type,
                             davidson_shift - xreal<FL>((FL)me->mpo->const_e),
                             me->para_rule, ortho_bra, projection_weights);
         for (int i = 0; i < mket->nroots; i++) {
@@ -1802,6 +1854,7 @@ template <typename S, typename FL, typename FLS> struct DMRG {
         sweep_cumulative_nflop = 0;
         sweep_max_pket_size = 0;
         sweep_max_eff_ham_size = 0;
+        sweep_max_eff_wfn_size = 0;
         frame_<FPS>()->reset_peak_used_memory();
         vector<int> sweep_range;
         if (forward)
@@ -2092,6 +2145,7 @@ template <typename S, typename FL, typename FLS> struct DMRG {
         sweep_cumulative_nflop = 0;
         sweep_max_pket_size = 0;
         sweep_max_eff_ham_size = 0;
+        sweep_max_eff_wfn_size = 0;
         frame_<FPS>()->reset_peak_used_memory();
         sweep_energies.resize(me->n_sites - me->dot + 1, vector<FPLS>{1E9});
         sweep_time.resize(me->n_sites - me->dot + 1, 0);
@@ -2367,6 +2421,15 @@ template <typename S, typename FL, typename FLS> struct DMRG {
                              << " | Tidle = " << tt[1] / comm->size
                              << " | Twait = " << tt[2] / comm->size;
                     }
+                    size_t sweep_max_eff_ham_size_pm = sweep_max_eff_ham_size;
+                    size_t sweep_max_eff_wfn_size_pm = sweep_max_eff_wfn_size;
+                    if (me->para_rule != nullptr) {
+                        shared_ptr<ParallelCommunicator<S>> comm =
+                            me->para_rule->comm;
+                        uint64_t tt[2] = {(uint64_t)sweep_max_eff_ham_size_pm,
+                                          (uint64_t)sweep_max_eff_wfn_size_pm};
+                        comm->reduce_max_optional(&tt[0], 2, comm->root);
+                    }
                     size_t dmain = frame_<FPS>()->peak_used_memory[0];
                     size_t dseco = frame_<FPS>()->peak_used_memory[1];
                     size_t imain = frame_<FPS>()->peak_used_memory[2];
@@ -2378,7 +2441,10 @@ template <typename S, typename FL, typename FLS> struct DMRG {
                          << Parsing::to_size_string(imain + iseco) << " ("
                          << (imain * 100 / (imain + iseco)) << "%)";
                     sout << " | Hmem = "
-                         << Parsing::to_size_string(sweep_max_eff_ham_size *
+                         << Parsing::to_size_string(sweep_max_eff_ham_size_pm *
+                                                    sizeof(FL));
+                    sout << " | Wmem = "
+                         << Parsing::to_size_string(sweep_max_eff_wfn_size_pm *
                                                     sizeof(FL));
                     sout << " | Pmem = "
                          << Parsing::to_size_string(sweep_max_pket_size *
@@ -2480,6 +2546,8 @@ template <typename S, typename FL, typename FLS> struct Linear {
     int linear_max_iter = 5000;
     int linear_soft_max_iter = -1;
     int conv_required_sweeps = 3;
+    int linear_def_min_size = 2;
+    int linear_def_max_size = 50;
     ConvergenceTypes conv_type = ConvergenceTypes::LastMinimal;
     NoiseTypes noise_type = NoiseTypes::DensityMatrix;
     TruncationTypes trunc_type = TruncationTypes::Physical;
@@ -2495,6 +2563,7 @@ template <typename S, typename FL, typename FLS> struct Linear {
     size_t sweep_cumulative_nflop = 0;
     size_t sweep_max_pket_size = 0;
     size_t sweep_max_eff_ham_size = 0;
+    size_t sweep_max_eff_wfn_size = 0;
     double tprt = 0, tmult = 0, teff = 0, tmve = 0, tblk = 0, tdm = 0,
            tsplt = 0, tsvd = 0;
     Timer _t, _t2;
@@ -2722,6 +2791,8 @@ template <typename S, typename FL, typename FLS> struct Linear {
                 linear_use_precondition, me->bra->tensors[i], right_bra);
             sweep_max_eff_ham_size =
                 max(sweep_max_eff_ham_size, l_eff->op->get_total_memory());
+            sweep_max_eff_wfn_size =
+                max(sweep_max_eff_wfn_size, l_eff->ket->total_memory);
             teff += _t.get_time();
             if (eq_type == EquationTypes::Normal) {
                 tuple<FLS, pair<int, int>, size_t, double> lpdi;
@@ -2758,6 +2829,7 @@ template <typename S, typename FL, typename FLS> struct Linear {
                                     real_bra, cg_n_harmonic_projection,
                                     iprint >= 3, linear_conv_thrd,
                                     linear_max_iter, linear_soft_max_iter,
+                                    linear_def_min_size, linear_def_max_size,
                                     me->para_rule);
                         else
                             lpdi = EffectiveFunctions<S, FL>::greens_function(
@@ -2798,7 +2870,9 @@ template <typename S, typename FL, typename FLS> struct Linear {
                     lpdi = EffectiveFunctions<S, FL>::greens_function_squared(
                         l_eff, lme->mpo->const_e, gf_omega, gf_eta, real_bra,
                         cg_n_harmonic_projection, iprint >= 3, linear_conv_thrd,
-                        linear_max_iter, linear_soft_max_iter, me->para_rule);
+                        linear_max_iter, linear_soft_max_iter,
+                        linear_def_min_size, linear_def_max_size,
+                        me->para_rule);
                 else
                     lpdi = EffectiveFunctions<S, FL>::greens_function(
                         l_eff, lme->mpo->const_e, solver_type, gf_omega, gf_eta,
@@ -3437,6 +3511,8 @@ template <typename S, typename FL, typename FLS> struct Linear {
                 me->bra->tensors[i], right_bra);
             sweep_max_eff_ham_size =
                 max(sweep_max_eff_ham_size, l_eff->op->get_total_memory());
+            sweep_max_eff_wfn_size =
+                max(sweep_max_eff_wfn_size, l_eff->ket->total_memory);
             teff += _t.get_time();
             if (eq_type == EquationTypes::Normal) {
                 tuple<FLS, pair<int, int>, size_t, double> lpdi;
@@ -3473,6 +3549,7 @@ template <typename S, typename FL, typename FLS> struct Linear {
                                     real_bra, cg_n_harmonic_projection,
                                     iprint >= 3, linear_conv_thrd,
                                     linear_max_iter, linear_soft_max_iter,
+                                    linear_def_min_size, linear_def_max_size,
                                     me->para_rule);
                         else
                             lpdi = EffectiveFunctions<S, FL>::greens_function(
@@ -3513,7 +3590,9 @@ template <typename S, typename FL, typename FLS> struct Linear {
                     lpdi = EffectiveFunctions<S, FL>::greens_function_squared(
                         l_eff, lme->mpo->const_e, gf_omega, gf_eta, real_bra,
                         cg_n_harmonic_projection, iprint >= 3, linear_conv_thrd,
-                        linear_max_iter, linear_soft_max_iter, me->para_rule);
+                        linear_max_iter, linear_soft_max_iter,
+                        linear_def_min_size, linear_def_max_size,
+                        me->para_rule);
                 else
                     lpdi = EffectiveFunctions<S, FL>::greens_function(
                         l_eff, lme->mpo->const_e, solver_type, gf_omega, gf_eta,
@@ -3964,6 +4043,7 @@ template <typename S, typename FL, typename FLS> struct Linear {
         sweep_cumulative_nflop = 0;
         sweep_max_pket_size = 0;
         sweep_max_eff_ham_size = 0;
+        sweep_max_eff_wfn_size = 0;
         frame_<FPS>()->reset_peak_used_memory();
         vector<int> sweep_range;
         if (forward)
@@ -4255,6 +4335,14 @@ template <typename S, typename FL, typename FLS> struct Linear {
                              << " | Twait = " << tt[2];
                         cout << endl;
                     }
+                    size_t sweep_max_eff_ham_size_pm = sweep_max_eff_ham_size;
+                    size_t sweep_max_eff_wfn_size_pm = sweep_max_eff_wfn_size;
+                    if (lme != nullptr && lme->para_rule != nullptr) {
+                        uint64_t tt[2] = {(uint64_t)sweep_max_eff_ham_size_pm,
+                                          (uint64_t)sweep_max_eff_wfn_size_pm};
+                        lme->para_rule->comm->reduce_max_optional(
+                            &tt[0], 2, lme->para_rule->comm->root);
+                    }
                     size_t dmain = frame_<FPS>()->peak_used_memory[0];
                     size_t dseco = frame_<FPS>()->peak_used_memory[1];
                     size_t imain = frame_<FPS>()->peak_used_memory[2];
@@ -4266,7 +4354,10 @@ template <typename S, typename FL, typename FLS> struct Linear {
                          << Parsing::to_size_string(imain + iseco) << " ("
                          << (imain * 100 / (imain + iseco)) << "%)";
                     cout << " | Hmem = "
-                         << Parsing::to_size_string(sweep_max_eff_ham_size *
+                         << Parsing::to_size_string(sweep_max_eff_ham_size_pm *
+                                                    sizeof(FL));
+                    cout << " | Wmem = "
+                         << Parsing::to_size_string(sweep_max_eff_wfn_size_pm *
                                                     sizeof(FL));
                     cout << " | Pmem = "
                          << Parsing::to_size_string(sweep_max_pket_size *
@@ -4384,6 +4475,7 @@ struct Expect {
     vector<FPS> wfn_spectra;
     size_t sweep_cumulative_nflop = 0;
     size_t sweep_max_eff_ham_size = 0;
+    size_t sweep_max_eff_wfn_size = 0;
     pair<size_t, size_t> max_move_env_mem;
     double tex = 0, teff = 0, tmve = 0, tblk = 0;
     Timer _t, _t2;
@@ -4566,6 +4658,8 @@ struct Expect {
                 teff += _t.get_time();
                 sweep_max_eff_ham_size =
                     max(sweep_max_eff_ham_size, k_eff->op->get_total_memory());
+                sweep_max_eff_wfn_size =
+                    max(sweep_max_eff_wfn_size, k_eff->ket->total_memory);
                 pdi = k_eff->expect(me->mpo->const_e, algo_type, ex_type,
                                     me->para_rule);
                 tex += _t.get_time();
@@ -4590,6 +4684,8 @@ struct Expect {
                 teff += _t.get_time();
                 sweep_max_eff_ham_size =
                     max(sweep_max_eff_ham_size, k_eff->op->get_total_memory());
+                sweep_max_eff_wfn_size =
+                    max(sweep_max_eff_wfn_size, k_eff->ket->total_memory);
                 pdi = k_eff->expect(me->mpo->const_e, algo_type, ex_type,
                                     me->para_rule);
                 tex += _t.get_time();
@@ -4737,6 +4833,8 @@ struct Expect {
         teff += _t.get_time();
         sweep_max_eff_ham_size =
             max(sweep_max_eff_ham_size, h_eff->op->get_total_memory());
+        sweep_max_eff_wfn_size =
+            max(sweep_max_eff_wfn_size, h_eff->ket->total_memory);
         auto pdi = h_eff->expect(me->mpo->const_e, algo_type, ex_type,
                                  me->para_rule, fuse_left);
         tex += _t.get_time();
@@ -4905,6 +5003,8 @@ struct Expect {
         teff += _t.get_time();
         sweep_max_eff_ham_size =
             max(sweep_max_eff_ham_size, h_eff->op->get_total_memory());
+        sweep_max_eff_wfn_size =
+            max(sweep_max_eff_wfn_size, h_eff->ket->total_memory);
         auto pdi =
             h_eff->expect(me->mpo->const_e, algo_type, ex_type, me->para_rule);
         tex += _t.get_time();
@@ -5049,6 +5149,12 @@ struct Expect {
         teff += _t.get_time();
         sweep_max_eff_ham_size =
             max(sweep_max_eff_ham_size, h_eff->op->get_total_memory());
+        sweep_max_eff_wfn_size = max(
+            sweep_max_eff_wfn_size,
+            accumulate(h_eff->ket.begin(), h_eff->ket.end(), (size_t)0,
+                       [](size_t x, shared_ptr<SparseMatrixGroup<S, FL>> y) {
+                           return x + y->total_memory;
+                       }));
         auto pdi =
             h_eff->expect(me->mpo->const_e, algo_type, ex_type, me->para_rule);
         tex += _t.get_time();
@@ -5284,6 +5390,12 @@ struct Expect {
         teff += _t.get_time();
         sweep_max_eff_ham_size =
             max(sweep_max_eff_ham_size, h_eff->op->get_total_memory());
+        sweep_max_eff_wfn_size = max(
+            sweep_max_eff_wfn_size,
+            accumulate(h_eff->ket.begin(), h_eff->ket.end(), (size_t)0,
+                       [](size_t x, shared_ptr<SparseMatrixGroup<S, FL>> y) {
+                           return x + y->total_memory;
+                       }));
         auto pdi =
             h_eff->expect(me->mpo->const_e, algo_type, ex_type, me->para_rule);
         tex += _t.get_time();
@@ -5466,6 +5578,7 @@ struct Expect {
         me->prepare();
         sweep_cumulative_nflop = 0;
         sweep_max_eff_ham_size = 0;
+        sweep_max_eff_wfn_size = 0;
         frame_<FPS>()->reset_peak_used_memory();
         vector<int> sweep_range;
         if (forward)
@@ -5545,6 +5658,15 @@ struct Expect {
                          << " | Twait = " << tt[2];
                     cout << endl;
                 }
+                size_t sweep_max_eff_ham_size_pm = sweep_max_eff_ham_size;
+                size_t sweep_max_eff_wfn_size_pm = sweep_max_eff_wfn_size;
+                if (me->para_rule != nullptr) {
+                    shared_ptr<ParallelCommunicator<S>> comm =
+                        me->para_rule->comm;
+                    uint64_t tt[2] = {(uint64_t)sweep_max_eff_ham_size_pm,
+                                      (uint64_t)sweep_max_eff_wfn_size_pm};
+                    comm->reduce_max_optional(&tt[0], 2, comm->root);
+                }
                 size_t dmain = frame_<FPS>()->peak_used_memory[0];
                 size_t dseco = frame_<FPS>()->peak_used_memory[1];
                 size_t imain = frame_<FPS>()->peak_used_memory[2];
@@ -5554,7 +5676,10 @@ struct Expect {
                 cout << " | Imem = " << Parsing::to_size_string(imain + iseco)
                      << " (" << (imain * 100 / (imain + iseco)) << "%)";
                 cout << " | Hmem = "
-                     << Parsing::to_size_string(sweep_max_eff_ham_size *
+                     << Parsing::to_size_string(sweep_max_eff_ham_size_pm *
+                                                sizeof(FL));
+                cout << " | Wmem = "
+                     << Parsing::to_size_string(sweep_max_eff_wfn_size_pm *
                                                 sizeof(FL));
                 cout << " | MaxBmem = "
                      << Parsing::to_size_string(max_move_env_mem.first *
diff --git a/src/pybind/pybind_dmrg.hpp b/src/pybind/pybind_dmrg.hpp
index 6e7a3456..09b4706e 100644
--- a/src/pybind/pybind_dmrg.hpp
+++ b/src/pybind/pybind_dmrg.hpp
@@ -1196,6 +1196,10 @@ void bind_fl_dmrg(py::module &m) {
                        &DMRG<S, FL, FLS>::davidson_max_iter)
         .def_readwrite("davidson_soft_max_iter",
                        &DMRG<S, FL, FLS>::davidson_soft_max_iter)
+        .def_readwrite("davidson_def_min_size",
+                       &DMRG<S, FL, FLS>::davidson_def_min_size)
+        .def_readwrite("davidson_def_max_size",
+                       &DMRG<S, FL, FLS>::davidson_def_max_size)
         .def_readwrite("davidson_shift", &DMRG<S, FL, FLS>::davidson_shift)
         .def_readwrite("davidson_type", &DMRG<S, FL, FLS>::davidson_type)
         .def_readwrite("conn_adjust_step", &DMRG<S, FL, FLS>::conn_adjust_step)
@@ -1490,6 +1494,10 @@ void bind_fl_linear(py::module &m) {
                        &Linear<S, FL, FLS>::linear_soft_max_iter)
         .def_readwrite("conv_required_sweeps",
                        &Linear<S, FL, FLS>::conv_required_sweeps)
+        .def_readwrite("linear_def_min_size",
+                       &Linear<S, FL, FLS>::linear_def_min_size)
+        .def_readwrite("linear_def_max_size",
+                       &Linear<S, FL, FLS>::linear_def_max_size)
         .def_readwrite("gf_omega", &Linear<S, FL, FLS>::gf_omega)
         .def_readwrite("gf_eta", &Linear<S, FL, FLS>::gf_eta)
         .def_readwrite("gf_extra_omegas", &Linear<S, FL, FLS>::gf_extra_omegas)
@@ -2465,13 +2473,17 @@ extern template auto bind_fl_trans_mps_spin_specific<SU2, SZ, complex<double>>(
     -> decltype(typename SU2::is_su2_t(typename SZ::is_sz_t()));
 
 extern template void
-bind_fl_trans_mps<SU2, double, complex<double>>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SU2, double, complex<double>>(py::module &m,
+                                                const string &aux_name);
 extern template void
-bind_fl_trans_mps<SU2, complex<double>, double>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SU2, complex<double>, double>(py::module &m,
+                                                const string &aux_name);
 extern template void
-bind_fl_trans_mps<SZ, double, complex<double>>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SZ, double, complex<double>>(py::module &m,
+                                               const string &aux_name);
 extern template void
-bind_fl_trans_mps<SZ, complex<double>, double>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SZ, complex<double>, double>(py::module &m,
+                                               const string &aux_name);
 #endif
 
 #endif
@@ -2726,13 +2738,17 @@ extern template auto bind_fl_trans_mps_spin_specific<SZ, SGF, complex<double>>(
 #endif
 
 extern template void
-bind_fl_trans_mps<SGF, double, complex<double>>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SGF, double, complex<double>>(py::module &m,
+                                                const string &aux_name);
 extern template void
-bind_fl_trans_mps<SGF, complex<double>, double>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SGF, complex<double>, double>(py::module &m,
+                                                const string &aux_name);
 extern template void
-bind_fl_trans_mps<SGB, double, complex<double>>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SGB, double, complex<double>>(py::module &m,
+                                                const string &aux_name);
 extern template void
-bind_fl_trans_mps<SGB, complex<double>, double>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SGB, complex<double>, double>(py::module &m,
+                                                const string &aux_name);
 #endif
 
 #endif
@@ -2929,13 +2945,17 @@ bind_fl_trans_mps<SZ, complex<double>, complex<float>>(py::module &m,
                                                        const string &aux_name);
 
 extern template void
-bind_fl_trans_mps<SU2, float, complex<float>>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SU2, float, complex<float>>(py::module &m,
+                                              const string &aux_name);
 extern template void
-bind_fl_trans_mps<SU2, complex<float>, float>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SU2, complex<float>, float>(py::module &m,
+                                              const string &aux_name);
 extern template void
-bind_fl_trans_mps<SZ, float, complex<float>>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SZ, float, complex<float>>(py::module &m,
+                                             const string &aux_name);
 extern template void
-bind_fl_trans_mps<SZ, complex<float>, float>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SZ, complex<float>, float>(py::module &m,
+                                             const string &aux_name);
 
 #endif
 
@@ -3074,13 +3094,17 @@ bind_fl_trans_mps<SGB, complex<double>, complex<float>>(py::module &m,
                                                         const string &aux_name);
 
 extern template void
-bind_fl_trans_mps<SGF, float, complex<float>>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SGF, float, complex<float>>(py::module &m,
+                                              const string &aux_name);
 extern template void
-bind_fl_trans_mps<SGF, complex<float>, float>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SGF, complex<float>, float>(py::module &m,
+                                              const string &aux_name);
 extern template void
-bind_fl_trans_mps<SGB, float, complex<float>>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SGB, float, complex<float>>(py::module &m,
+                                              const string &aux_name);
 extern template void
-bind_fl_trans_mps<SGB, complex<float>, float>(py::module &m, const string &aux_name);
+bind_fl_trans_mps<SGB, complex<float>, float>(py::module &m,
+                                              const string &aux_name);
 #endif
 
 #endif